##Replication Code- Michael O'Neil and Amy Uden; March 2016
##Replicating and extending upon "Polarization and the Decline of the American Floating Voter" by Corwin Smidt
###########################################################################
###########################################################################

#loading packages, etc. for multinomial logit
setwd(dirname(file.choose()))
require(foreign)
require(mlogit)

##loading various packages and dependencies for bivariate probit
source("https://bioconductor.org/biocLite.R")
biocLite()
sbiocLite("graph")
biocLite("Rgraphviz")
biocLite("Rcpp")
library("Zelig")
library("ZeligChoice")
library("foreign")

###########################################################################
###########################################################################
#Extensions on Table 1 with Relig Variables and interactions:

load("modifiedcdf.RData")
summary(x)
head(x)

#################################################
#prepare data for fitting bivariate probit model

x<-cbind.data.frame(x["ambivalent"],x["undecided"],x["education"],x["age"],x["female"],x["sumissue"],x["black"],x["totalmentions"],x["pidstrength2"],x["changerdpi"],x["reelectcamp"],x["year"],x["weight"])

#check if recurisive and drop missing data
is.recursive(x)          
y<-as.data.frame(x[complete.cases(x),]) 

##checking if this is correct form###
summary(y)               
head(y)     

##Fit model with no weighting to replicate
out<- zelig(cbind(y$ambivalent, y$undecided) ~ y$education + y$age + y$female + y$sumissue + y$black + y$totalmentions + y$pidstrength2 + y$changerdpi +y$reelectcamp,
            model = "blogit", data = y)

#initialize variables to bootstrap coefficients and standard errors
bootdat <- NULL
onecluster <- NULL
indexdat <- NULL
reps <- 2000
coeffs  <- matrix(NA, nrow = reps, ncol = 21)

set.seed(1004)

clusterboot <- for(i in 1:reps){
  
  bootdat <- NULL  
  model.sim <- NULL
  
  ##pull clusters by years with replace
  clusters <- names(table(y$year))
  draws <- sample( 1:length(clusters), length(clusters), replace=TRUE)  
  inds <- clusters[draws]
  print(i)
  
  for(j in 1:length(inds)){
    onecluster <- y[as.character(y$year) == inds[j], ]
    
    bootdat <- rbind(bootdat, onecluster)
  }
  
  model.sim <- zelig(cbind(bootdat$ambivalent, bootdat$undecided) ~ bootdat$education + bootdat$age + bootdat$female + bootdat$sumissue + bootdat$black + bootdat$totalmentions + bootdat$pidstrength2 + bootdat$changerdpi +bootdat$reelectcamp,
                     model = "bprobit", data = bootdat)
  
  wat<- seq(.01,1,.01)
  setx(model.sim)
  coeffs.long <- as.data.frame(model.sim$getcoef())[ ,1]
  
  coeffs[i, ] <- t(as.matrix(coeffs.long[1:21]))
  
}

save(coeffs, file = "BC_Coeffs_Table1_32116.Rdata")
load("BC_Coeffs_Table1_32116.Rdata")

##get mean of coeffs
e.coeff<-NULL
for(i in 1:ncol(coeffs)){ 
  v <-coeffs[, i]  
  e.coeff[i] <- mean(v)
}

e.coeff

##get standard errors from bootstrap
se <- NULL

for(i in 1:ncol(coeffs)){ 
  vector <-coeffs[, i]  
  se[i] <- sd(vector)
}

se

#p-values
t<-NULL
pv<-NULL

for(i in 1:21){
  mod <- e.coeff[i]
  mod.se <-  se[i]
  t <- (mod-0)/mod.se
  pv[i] <- 2*(1 - pnorm(abs(t), 0, 1))
}

pv

#bias
#Stata nebulously outputs a bias correction, possibly for bootstrapping.
#Note that this correction is not to be subtracted as the author does in the Stata help files.
#As this is a replication, the correction is here.

bias.correction<-NULL
bias.correction<-as.data.frame(out$getcoef())[,1]-t(as.data.frame(e.coeff))
bc.coeff<-e.coeff+bias.correction
t.bias.correction <- t(bias.correction)

c.coeffs<-coeffs

for(i in 1:21){
  c.coeffs[i, ] <-  coeffs[i, ] + bias.correction
}
head(c.coeffs)
#previous section with corrections;

bc.coeff<-NULL
for(i in 1:ncol(c.coeffs)){ 
  v <-c.coeffs[, i]  
  bc.coeff[i] <- mean(v)
}

bc.coeff

##get standard errors from bootstrap
bc.se<-NULL
for(i in 1:ncol(c.coeffs)){ 
  vector <-c.coeffs[, i]  
  bc.se[i] <- sd(vector)
}

bc.se

#p-values
bc.pv<-NULL
for(i in 1:21){
  bc.mod <- c.coeffs[i]
  bc.mod.se <-  bc.se[i]
  bc.t <- (bc.mod-0)/bc.mod.se
  bc.pv[i] <- 2*(1 - pnorm(abs(bc.t), 0, 1))
}

bc.pv

##############################################
##cross-era effects

#These means are pulled from the the source Stata code and cannot be replicated through any permutation of mean or weighted mean, >= or >, or grouping by ambivalent v. undecided.
#Using these values against the 2000 and above values they should be of pre 1980 existant in Stata code with no source
#Given the nebulous origin of the cut-off values and ad hoc rescaling to normal distribution of a non-normal DGP this is a prime canidate for extension.

#aeducation<-0
#aage<-0
#afem<-0
#asumissue<-0
#ablack<-0
#atotalmentions<-0
#apidstrength2<-0
#achangerdpi<-0
#areelectcamp<-0

#for(i in 1:2000){

#yed80<-y
#yed80$education[y$year >1980]<-1.08453
#mi<-min(c.coeffs[i,1]+c.coeffs[i,4]*yed80$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)
#yed80$education[y$year >1980]<-1.762244
#ma<-max(c.coeffs[i,1]+c.coeffs[i,4]*yed80$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)
#aeducation[y$year[i]>=2000]<-aeducation+ma-mi

#yed80<-y
#yed80$age[y$year >1980]<-45.37945
#mi<-min(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*yed80$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)
#yed80$age[y$year >1980]<-46.31365
#ma<-max(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*yed80$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)

# aage[y$year[i]>=2000]<-aage+ma-mi

#yed80<-y
#yed80$female[y$year >1980]<-.5581034
#mi<-min(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*yed80$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)
#yed80$female[y$year >1980]<-.5414663
#ma<-max(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*yed80$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)

#  afem[y$year[i]>=2000]<-afem+ma-mi

#yed80<-y
#yed80$sumissue[y$year >1980]<-.4429074
#mi<-min(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*yed80$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)
#yed80$sumissue[y$year >1980]<-.6405873
#ma<-max(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*yed80$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)

# asumissue[y$year[i]>=2000]<-asumissue+ma-mi

#yed80<-y
#yed80$black[y$year >1980]<-.5581034
#mi<-min(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*yed80$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)
#yed80$black[y$year >1980]<-.5414663
#ma<-max(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*yed80$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)

# ablack[y$year[i]>=2000]<-ablack+ma-mi

#yed80<-y
#yed80$totalmentions[y$year >1980]<-8.23119
#mi<-min(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*yed80$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)
#yed80$totalmentions[y$year >1980]<-8.249172
#ma<-max(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*yed80$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)

# atotalmentions[y$year[i]>=2000]<-atotalmentions+ma-mi

#yed80<-y
#yed80$pidstrength2[y$year >1980]<-1.198608
#mi<-min(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*yed80$totalmentions + c.coeffs[i,16]*yed80$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)
#yed80$pidstrength2[y$year >1980]<-1.204974
#ma<-max(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*yed80$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*y$reelectcamp)

# apidstrength2[y$year[i]>=2000]<-apidstrength2+ma-mi

#yed80<-y
#yed80$changerdpi[y$year >1980]<-.4429074
#mi<-min(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*yed80$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*yed80$changerdpi + c.coeffs[i,20]*y$reelectcamp)
#yed80$changerdpi[y$year >1980]<-.6405873
#ma<-max(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*yed80$changerdpi + c.coeffs[i,20]*y$reelectcamp)

# achangerdpi[y$year[i]>=2000]<-achangerdpi+ma-mi

#yed80<-y
#yed80$reelectcamp[y$year >1980]<-.2857143
#mi<-min(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*yed80$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*yed80$reelectcamp)
#yed80$reelectcamp[y$year >1980]<-1/3
#ma<-max(c.coeffs[i,1]+c.coeffs[i,4]*y$education + c.coeffs[i,6]*y$age + c.coeffs[i,8]*y$female + c.coeffs[i,10]*y$sumissue + c.coeffs[i,12]*y$black + c.coeffs[i,14]*y$totalmentions + c.coeffs[i,16]*y$pidstrength2  + c.coeffs[i,18]*y$changerdpi + c.coeffs[i,20]*yed80$reelectcamp)

# areelectcamp[y$year[i]>=2000]<-areelectcamp+ma-mi

#print(i)
#}
##age::sumissue interaction

library("Zelig")
library("ZeligChoice")
library("foreign")

setwd("C:/Users/mio540/Desktop")
x<-read.dta("Newmodifiedcdf_final12.dta")

y<-cbind.data.frame(x["ambivalent"],x["undecided"],x["education"],x["age"],x["female"],x["sumissue"],x["black"],x["totalmentions"],x["pidstrength2"],x["changerdpi"],x["reelectcamp"],x["year"],x["weight"])
r<-cbind.data.frame(x["ambivalent"],x["undecided"],x["education"],x["age"],x["female"],x["sumissue"],x["black"],x["totalmentions"],x["pidstrength2"],x["changerdpi"],x["reelectcamp"],x["religserv"],x["year"],x["weight"]) #,x["religfine"]
q<-cbind.data.frame(x["ambivalent"],x["undecided"],x["education"],x["age"],x["female"],x["sumissue"],x["black"],x["totalmentions"],x["pidstrength2"],x["changerdpi"],x["reelectcamp"],x["religserv"],x["religfine"],x["year"],x["weight"] )
#check if recurisive and drop missing data
is.recursive(y)          
y<-as.data.frame(y[complete.cases(y),]) 
r<-as.data.frame(r[complete.cases(r),]) 
q<-as.data.frame(q[complete.cases(q),]) 

##checking if this is correct form###
summary(q)               
head(q)     

##Fit model with no weighting to replicate
out<- zelig(cbind(y$ambivalent, y$undecided) ~ y$education + y$age + y$female + y$sumissue + y$black + y$totalmentions + y$pidstrength2 + y$changerdpi +y$reelectcamp +y$age*y$sumissue,
            model = "bprobit", data = y)

#initialize variables to bootstrap coefficients and standard errors
bootdat <- NULL
onecluster <- NULL
indexdat <- NULL
reps <- 100
coeffs  <- matrix(NA, nrow = reps, ncol = 23)

set.seed(1004)

clusterboot <- for(i in 1:reps){
  
  bootdat <- NULL  
  model.sim <- NULL
  
  ##pull clusters by years with replace
  clusters <- names(table(y$year))
  draws <- sample( 1:length(clusters), length(clusters), replace=TRUE)  
  inds <- clusters[draws]
  print(i)
  
  for(j in 1:length(inds)){
    onecluster <- y[as.character(r$year) == inds[j], ]
    bootdat <- rbind(bootdat, onecluster)
  }
  
  
  model.sim <- zelig(cbind(bootdat$ambivalent, bootdat$undecided) ~ bootdat$education + bootdat$age + bootdat$female + bootdat$sumissue + bootdat$black + bootdat$totalmentions + bootdat$pidstrength2 + bootdat$changerdpi +bootdat$reelectcamp 
                     + bootdat$sumissue*bootdat$age
                     #+factor(bootdat$religserv)  ##uncomment for different model specifications
                     #+factor(bootdat$religserv) + bootdat$sumissue*bootdat$age
                     #+factor(religfine3)
                     ,
                     model = "bprobit", data = bootdat)
  
  #religserv not significant as a predictor of ambiv or und. .group undecided 3 is only to weakly reach significance at .05
  coeffsf.long <- as.data.frame(model.sim$getcoef())[ ,1]
  
  coeffs[i, ] <- t(as.matrix(coeffsf.long[1:23]))
  
}

e.coeff<-NULL
for(i in 1:ncol(coeffs)){ 
  v <-coeffs[, i]  
  e.coeff[i] <- mean(v)
}

e.coeff

##get standard errors from bootstrap
se <- NULL

for(i in 1:ncol(coeffs)){ 
  vector <-coeffs[, i]  
  se[i] <- sd(vector)
}

se

#p-values
t<-NULL
pv<-NULL

for(i in 1:23){
  mod <- e.coeff[i]
  mod.se <-  se[i]
  t <- (mod-0)/mod.se
  pv[i] <- 2*(1 - pnorm(abs(t), 0, 1))
}

pv

#bias correction for bootstrapping

bias.correction<-NULL
bias.correction<-as.data.frame(out$getcoef())[,1]-t(as.data.frame(e.coeff))
bc.coeff<-e.coeff+bias.correction
t.bias.correction <- t(bias.correction)

c.coeffs<-coeffs

for(i in 1:23){
  c.coeffs[i, ] <-  coeffs[i, ] + bias.correction
}
head(c.coeffs)
#previous section with corrections;

bc.coeff<-NULL
for(i in 1:ncol(c.coeffs)){ 
  v <-c.coeffs[, i]  
  bc.coeff[i] <- mean(v)
}

bc.coeff

bc.se<-NULL
for(i in 1:ncol(c.coeffs)){ 
  vector <-c.coeffs[, i]  
  bc.se[i] <- sd(vector)
}

bc.se

#p-values
bc.pv<-NULL
for(i in 1:23){
  bc.mod <- c.coeffs[i]
  bc.mod.se <-  bc.se[i]
  bc.t <- (bc.mod-0)/bc.mod.se
  bc.pv[i] <- 2*(1 - pnorm(abs(bc.t), 0, 1))
}

bc.pv

header.names<-c("Ambivalent","Undecided")
var.names<-c("Constant", "Education", "Age", "Female", "Sum Issue", "Black", "Total Mentions", "Pid Strength2", "Changer dpi", "Reelection Campaign","Age::Sumissue")
ambivelence.coeff<-bc.coeff[c(1,4,6,8,10,12,14,16,18,20,22)]
undecided.coeff<-bc.coeff[c(2,5,7,9,11,13,15,17,19,21,23)]
ambivelence.se<-bc.se[c(1,4,6,8,10,12,14,16,18,20,22)]
undecided.se<-bc.se[c(2,5,7,9,11,13,15,17,19,21,23)]
n<-length(t(y))/length(y)
Elections<-13
ll<- -16090.84
rho<-0.1075

library(xtable)
xtable(cbind(var.names,round(ambivelence.coeff,6),round(undecided.coeff,6)))

########################################################################################################################################
########################################################################################################################################

##Expansions on Table 3 with Relig variables, interactions, and cross-era comparisons:

##Data preliminaries from original model:
statadata <- read.dta("Newmodifiedcdf_final12.dta")
summary(statadata)

#Replacing weights with post-election weights where they exist, per Smidt's .do file for his original model.
for(i in 1:nrow(statadata)){
  statadata$weight[i] <- if(is.na(statadata$postweight[i])){statadata$weight[i]}else{statadata$postweight[i]}  
}
summary(statadata$weight)

## Creating collapsed specification of religfine to examine denominations of interest with less noise. 
statadata$religfine2 <- statadata$religfine 
statadata$religfine2[statadata$religfine2 == 13] <- 14
statadata$religfine2[statadata$religfine2 == 2] <- 20
statadata$religfine2[statadata$religfine2 == 3] <- 5
statadata$religfine2[statadata$religfine2 == 4] <- 5
statadata$religfine2[statadata$religfine2 == 7] <- 5   
statadata$religfine2[statadata$religfine2 == 8] <- 5  
statadata$religfine2[statadata$religfine2 == 9] <- 5
table(statadata$religfine2)

##Checking balance from original dataset to added variable datasets:
statadata.orig <- statadata[  , c(1, 3, 6,  46, 47, 48, 49, 51, 55, 57, 58, 60, 72, 73, 80)]
head(statadata.orig)
samp.orig <- na.omit(statadata.orig)
nrow(samp.orig)

statadata3 <- statadata[  , c(1, 3, 6, 9, 12, 46, 47, 48, 49, 51, 55, 57, 58, 60, 72, 73, 80, 93)]
statadata3 <- as.data.frame(statadata3)
head(statadata3)
samp.comp <-na.omit(statadata3)
nrow(samp.comp)

summary(samp.orig)
summary(samp.comp)

##Balance checks for 1984 missingness:
statadata.orig.r <- statadata[  , c(1, 3, 6, 9, 12, 46, 47, 48, 49, 51, 55, 57, 58, 60, 72, 73, 80, 93)]
summary(statadata.orig.r[statadata.orig.r$year == 1984, ]) 
nrow(statadata.orig.r[statadata.orig.r$year == 1984, ]) 

######################################################
##Create new catvoter variables for use as table 3 DV

table(statadata3$catvoter2)
statadata3$catvoter2 <- as.numeric(statadata3$catvoter2)
#This version base is: 1=Repeat Non-voter; 2=Standpatter; 3=Floating Voter; 4=Surger & Decliner.

statadata3$Newcatvoter2 <- statadata3$catvoter2
statadata3$Newcatvoter2[statadata3$Newcatvoter2== 2] <- 0
statadata3$Newcatvoter2[statadata3$Newcatvoter2== 3] <-2
statadata3$Newcatvoter2[statadata3$Newcatvoter2== 4] <- 3
table(statadata3$Newcatvoter2) #Check: New frequencies (recoded 0-3 with Standpatter as base).
#0=Standpatter; 1=Repeat Non-voter; 2=Floating Voter; 3=Surger & Decliner.

##Alternative coding. Use this to compare SPs
statadata3$Newcatvoter3 <- statadata3$catvoter2
statadata3$Newcatvoter3[statadata3$Newcatvoter3== 1] <- 0
statadata3$Newcatvoter3[statadata3$Newcatvoter3== 2] <- 1
statadata3$Newcatvoter3[statadata3$Newcatvoter3== 3] <- 2
statadata3$Newcatvoter3[statadata3$Newcatvoter3== 4] <- 3
table(statadata3$Newcatvoter3)
#1=Standpatter; 0=Repeat Non-voter; 2=Floating Voter; 3=Surger & Decliner.
head(statadata3)

##Reweight and create actual sample
samp <- na.omit(statadata3)
nrow(samp)
sum <- sum(samp$weight)
samp$weight2 <- samp$weight*(nrow(samp)/sum)
head(samp$weight2)
sum(samp$weight2) 
head(samp)

##Add Religion Variable:
samp$religserv1 <- samp$religserv
samp$religserv1[samp$religserv1 == 5] <- 6
samp$religserv1[samp$religserv1 == 4] <- 7
samp$religserv1[samp$religserv1 == 2] <- 4
samp$religserv1[samp$religserv1 == 1] <- 5
samp$religserv1[samp$religserv1 == 6] <- 1
samp$religserv1[samp$religserv1 == 7] <- 2
table(samp$religserv1)

samp$religfine3 <- samp$religfine2 #recoding religious denomination variable
samp$religfine3[samp$religfine3 == 14] <- 5
table(samp$religfine3)

save(samp, file = "SampleData.Rdata")
#load("SampleData.Rdata")

####################################################################################################################
####################################################################################################################
#Model assumption checks for new variables:
##Checking for perfect prediction and small sample cells:

table(samp$Newcatvoter2, samp$religfine2)  #no perfect prediction, some concerns on category 5 for religfine2 small cell
table(samp$Newcatvoter2, samp$religfine3) #using further collapsed religfine3 variable would fix this.
table(samp$Newcatvoter2, samp$religserv) #no issues here

###########################################################################
##Expansions on Smidt's Table 3 with Relig Variables and interactions:
## Running non-bootstrapped model as a point of comparison to Smidt-- original model:

##Smidt's original specification: used in comparing sensititivy of across-era effect cutpoints
##Not in use in Table 3 expansions, expansions use recode for baseline as "repeat non-voter" in order to compare Standpatter and FV both as Quantities of Interest.
frame3 <- mlogit.data(samp, shape="wide", choice="Newcatvoter2")
model3 <- mlogit(Newcatvoter2 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp, data = frame3, weights = weight2)
summary(model3)
length(model3$coefficients)

##Re-leveled original specification: Repeat Non-Voter as baseline. Coefficients will differ, but across-era effects within voter group will not. 
frame4 <- mlogit.data(samp, shape="wide", choice="Newcatvoter3")
model3 <- mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp, data = frame4, weights = weight2)
summary(model3)

##Interaction on era for same model- for use in exploring across-era effects:
samp$era <- samp$year
samp$era[samp$year < 1990] <- 0
samp$era[samp$year > 1990] <- 1
table(samp$era)
table(frame4$weight2)

model3I <- mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + era, data = frame4, weights = weight2)
summary(model3I)

## Interactive model (non-bootstrapped): original covariates only plus age:sumissue interaction
model3b <- mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + age*sumissue, data = frame4, weights = weight2)
summary(model3b)

##Adding Religion Variables (non-bootstrapped): original covariates only plus religserv (measure of religiosity)
model3c <- mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + religserv1, data = frame4, weights = weight2)
summary(model3c)
model3c$coefficients[1:42]

##Adding Religion Variables (non-bootstrapped): original covariates only plus religfine (measure of religious affiliation)
model3d <- mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + factor(religfine3), data = frame4, weights = weight2)
summary(model3d)
model3d$coefficients[1:48]

#Adding both Religion Variables (non-bootstrapped): original covariates plus religserv1, religfine
model3e <- mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + religserv1 + factor(religfine3), data = frame4, weights = weight2)
summary(model3e) 

##Both religion variables plus religserv1*sumissue
model3f <- mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + religserv1 + factor(religfine3)+ age*sumissue, data = frame4, weights = weight2)
summary(model3f) 
model3f$coefficients[1:54]

####################################################################################################################################
####################################################################################################################################
##Bootstrap each of these models, with relevant SD, pvalue, and expected value calculations following:

####################################################################################################################################
####################################################################################################################################
##Model 1- Smidt's original 

bootdat <- NULL
onecluster <- NULL
indexdat <- NULL
reps <- 2000
coeffs1  <- matrix(NA, nrow = reps, ncol = 39)
set.seed(1004)

clusterboot <- for(i in 1:reps){
  bootdat <- NULL  
  model.sim <- NULL
  clusters <- names(table(samp$year))
  draws <- sample( 1:length(clusters), length(clusters), replace=TRUE)  
  inds <- clusters[draws]
  print(i)
  for(j in 1:length(inds)){
    onecluster <- samp[as.character(samp$year) == inds[j], ]
    bootdat <- rbind(bootdat, onecluster)
  }
  frame.sim <- mlogit.data(bootdat, choice = "Newcatvoter3", shape = "wide")
  model.sim <- tryCatch(mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp, data = frame.sim, weights = weight2),
                        error = function(e){NA})
  if (is.na(model.sim)){i = i+1 }else{coeffs.long <- as.data.frame(model.sim$coefficients)[ , 1]}
  coeffs1[i, ] <- t(as.matrix(coeffs.long[1:39]))
}

save(coeffs1, file = "NewCoeffs1.Rdata")
load("NewCoeffs1.Rdata")
coeffsB<- as.matrix(coeffs1)
coeffsB <- na.omit(coeffsB)
dim(coeffsB)
expected.coeffs <- NULL
expected.coeffs <- apply(coeffsB, 2, mean)

##Model Standard Errors
sterrs <- NULL
for(i in 1:ncol(coeffsB)){ 
  vector <-coeffsB[, i]  
  sterrs[i] <- sd(vector)
}
##Model P-Values:
pvalue <- NULL
for(i in 1:ncol(coeffsB)){
  ate <- expected.coeffs[i]
  ate.SE <-  sterrs[i]
  tstat <- (ate-0)/ate.SE
  pvalue[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}

##Bias calculations:
original.coeffs <- as.data.frame(model3$coefficients)
original.coeffs <- original.coeffs[ , 1]
original.coeffs <- as.vector(original.coeffs[1:39])
expected.vector <- as.vector(expected.coeffs)
bias <- original.coeffs - expected.vector
bc <- expected.vector + as.vector(bias)

##Table of coefficients, bootstrapped SEs, and p-values (where the 0-2 difference is the Standpatter to Floating Voter MNL estimate):
table <- cbind( names(model3$coefficients), expected.coeffs, bc, sterrs, pvalue)
colnames(table) <- c( "" , "Bootstrapped Coefficient", "Bias-Corrected Coefficient", "Standard Error", "P-Value" )
table
save(table, file = "CoeffsTableModel1.Rdata")

##Predicted values: pulling relevant covariates; doing bias-correction across all bootstrapped coefficients
head(samp)
part.samp <- samp[ , c(1, 11, 6, 7, 8, 9, 15, 10, 17, 3, 16, 14, 13, 19, 21)]
intercept <- rep(1, nrow(part.samp)) 
part.samp <- cbind(intercept, part.samp) 
dim(part.samp)
head(part.samp)
names(part.samp) <- c("intercept", "year", "efficacy", "education", "age", "female", "black", "totalmentions", "pidstrength2", 
                      "sumissue", "undecided", "ambivalent", "changerpdi", "reelectcamp", "Newcatvoter2", "weight2")

bc.coeffs2 <- matrix(NA, nrow(coeffsB), ncol(coeffsB))
for(i in 1:nrow(coeffsB)){
  bc.coeffs2[i, ] <-  coeffsB[i, ] + bias
}
bc.SR <- bc.coeffs2[ , c(1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37)]
bc.SF <- bc.coeffs2[ , c(2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38)]
bc.SS <- bc.coeffs2[ , c(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39)]
bcsimbetas <- as.matrix(cbind(bc.SR, bc.SF, bc.SS)) 

##Forloop for predicted values for each year's mean:
year <- c("1960", "1964", "1968", "1972", "1976", "1980", "1984", "1988", "1992", "1996", "2000", "2004")
meanvecF <- NULL
sdvecF <- NULL
pvalvecF <- NULL
meanvecS <- NULL
sdvecS <- NULL
pvalvecS <- NULL
head(part.samp)

for(i in year){
  wmeans <- apply(part.samp[part.samp$year == i, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year == i])
  calcdata <- wmeans[ - c(2, 15, 16)]
  pred.storageF<- NULL
  pred.storageS<- NULL
  for(j in 1:nrow(coeffsB)){ 
    predictedF <- exp(calcdata %*% bcsimbetas[j, 14:26])/(1 + exp(calcdata %*% bcsimbetas[j, 1:13]) + exp(calcdata%*% bcsimbetas[j, 14:26]) +  exp(calcdata %*% bcsimbetas[j, 27:39]))
    predictedS <- exp(calcdata %*% bcsimbetas[j, 1:13])/(1 + exp(calcdata %*% bcsimbetas[j, 1:13]) + exp(calcdata%*% bcsimbetas[j, 14:26]) +  exp(calcdata %*% bcsimbetas[j, 27:39]))
    pred.storageF[j] <- predictedF
    pred.storageS[j] <- predictedS
  }
  meanvecF[i] <- mean(pred.storageF)
  meanvecS[i] <- mean(pred.storageS) 
  sdvecF[i] <- sd(pred.storageF)
  sdvecS[i] <- sd(pred.storageS)
  ate1 <- mean(pred.storageF)
  ate.SE1 <-  sd(pred.storageF)
  tstat1 <- (ate1-0)/ate.SE1
  pvalvecF[i] <- 2*(1 - pnorm(abs(tstat1), 0, 1))
  ate2 <- mean(pred.storageS)
  ate.SE2 <-  sd(pred.storageS)
  tstat2 <- (ate2-0)/ate.SE2
  pvalvecS[i] <- 2*(1 - pnorm(abs(tstat2), 0, 1))
}

lowerF <- meanvecF - sdvecF
upperF <- meanvecF + sdvecF
lowerS <- meanvecS - sdvecS
upperS <- meanvecS + sdvecS

predict.table1 <- cbind(meanvecF, sdvecF, pvalvecF, lowerF, upperF, meanvecS, sdvecS, pvalvecS, lowerS, upperS) 
save(predict.table1, file = "PredictTable1.Rdata")

##############################################################################################################
##############################################################################################################
##Model 2- age:sumissue Interaction  

bootdat <- NULL
onecluster <- NULL
indexdat <- NULL
reps <- 2000
coeffs2  <- matrix(NA, nrow = reps, ncol = 42)
set.seed(1004)

clusterboot <- for(i in 1:reps){
  bootdat <- NULL  
  model.sim <- NULL
  clusters <- names(table(samp$year))
  draws <- sample( 1:length(clusters), length(clusters), replace=TRUE)  
  inds <- clusters[draws]
  print(i)
  for(j in 1:length(inds)){
    onecluster <- samp[as.character(samp$year) == inds[j], ]
    bootdat <- rbind(bootdat, onecluster)
  }
  frame.sim <- mlogit.data(bootdat, choice = "Newcatvoter3", shape = "wide")
  model.sim <- tryCatch(mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + age*sumissue, data = frame.sim, weights = weight2),
                        error = function(e){NA})
  if (is.na(model.sim)){i = i+1 }else{coeffs.long <- as.data.frame(model.sim$coefficients)[ , 1]}
  coeffs2[i, ] <- t(as.matrix(coeffs.long[1:42]))
}
save(coeffs2, file = "NewCoeffs2.Rdata")
load("NewCoeffs2.Rdata")

coeffsB<- as.matrix(coeffs2)
coeffsB <- na.omit(coeffsB)
dim(coeffsB)
expected.coeffs <- NULL
expected.coeffs <- apply(coeffsB, 2, mean)

##Model Standard Errors
sterrs <- NULL
for(i in 1:ncol(coeffsB)){ 
  vector <-coeffsB[, i]  
  sterrs[i] <- sd(vector)
}
##Model P-Values:
pvalue <- NULL
for(i in 1:ncol(coeffsB)){
  ate <- expected.coeffs[i]
  ate.SE <-  sterrs[i]
  tstat <- (ate-0)/ate.SE
  pvalue[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}

##Bias calculations:
original.coeffs <- as.data.frame(model3b$coefficients)
original.coeffs <- original.coeffs[ , 1]
original.coeffs <- as.vector(original.coeffs[1:42])
expected.vector <- as.vector(expected.coeffs)
bias <- original.coeffs - expected.vector
bc <- expected.vector + as.vector(bias)

##Table of coefficients, bootstrapped SEs, and p-values (where the 0-2 difference is the Standpatter to Floating Voter MNL estimate):
length(names(model3b$coefficients))
table <- cbind( names(model3b$coefficients), expected.coeffs, bc, sterrs, pvalue)
colnames(table) <- c( "" , "Bootstrapped Coefficient", "Bias-Corrected Coefficient", "Standard Error", "P-Value" )
table
save(table, file = "CoeffsTableModel2.Rdata")

######################################################
##Predicted values for Model 2: pulling relevant covariates; doing bias-correction across all bootstrapped coefficients
head(samp)
part.samp <- samp[ , c(1, 11, 6, 7, 8, 9, 15, 10, 17, 3, 16, 14, 13, 20, 21)] 
intercept <- rep(1, nrow(part.samp)) 
interaction <- samp$age * samp$sumissue
part.samp <- cbind(intercept, part.samp, interaction) 
dim(part.samp)
head(part.samp)
names(part.samp) <- c("intercept", "year", "efficacy", "education", "age", "female", "black", "totalmentions", "pidstrength2", 
                      "sumissue", "undecided", "ambivalent", "changerpdi", "reelectcamp", "Newcatvoter3", "weight2", "age:sumissue")

bc.coeffs2 <- matrix(NA, nrow(coeffsB), ncol(coeffsB))
for(i in 1:nrow(coeffsB)){
  bc.coeffs2[i, ] <-  coeffsB[i, ] + bias
}

bc.SR <- bc.coeffs2[ , c(1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40)]
bc.SF <- bc.coeffs2[ , c(2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41)]
bc.SS <- bc.coeffs2[ , c(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42)]
bcsimbetas <- as.matrix(cbind(bc.SR, bc.SF, bc.SS)) 

##Forloop for predicted values for each year's mean:
year <- c("1960", "1964", "1968", "1972", "1976", "1980", "1984", "1988", "1992", "1996", "2000", "2004")
meanvecF <- NULL
sdvecF <- NULL
pvalvecF <- NULL
meanvecS <- NULL
sdvecS <- NULL
pvalvecS <- NULL
head(part.samp)

##Need to get separate weighted means for each year's interaction term:

for(i in year){
  
  wmeans <- apply(part.samp[part.samp$year == i, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year == i])
  calcdata <- wmeans[ - c(2, 15, 16, 17)]
  interactmean <- wmeans[5] * wmeans[10]
  calcdata <- c(calcdata, interactmean)
  names(calcdata)[14] <- "sumissue:age"
  
  pred.storageF<- NULL
  pred.storageS<- NULL
  
  for(j in 1:nrow(coeffsB)){ 
    predictedF <- exp(calcdata %*% bcsimbetas[j, 15:28])/(1 + exp(calcdata %*% bcsimbetas[j, 1:14]) + exp(calcdata%*% bcsimbetas[j, 15:28]) +  exp(calcdata %*% bcsimbetas[j, 29:42]))
    predictedS <- exp(calcdata %*% bcsimbetas[j, 1:14])/(1 + exp(calcdata %*% bcsimbetas[j, 1:14]) + exp(calcdata%*% bcsimbetas[j, 15:28]) +  exp(calcdata %*% bcsimbetas[j, 29:42]))
    pred.storageF[j] <- predictedF
    pred.storageS[j] <- predictedS
  }
  
  meanvecF[i] <- mean(pred.storageF)
  sdvecF[i] <- sd(pred.storageF)
  meanvecS[i] <- mean(pred.storageS)
  sdvecS[i] <- sd(pred.storageS)
  
  ate1 <- mean(pred.storageF)
  ate.SE1 <-  sd(pred.storageF)
  tstat1 <- (ate1-0)/ate.SE1
  pvalvecF[i] <- 2*(1 - pnorm(abs(tstat1), 0, 1))
  
  ate2 <- mean(pred.storageS)
  ate.SE2 <-  sd(pred.storageS)
  tstat2 <- (ate2-0)/ate.SE2
  pvalvecS[i] <- 2*(1 - pnorm(abs(tstat2), 0, 1))
  
}

lowerF <- meanvecF - sdvecF
upperF <- meanvecF + sdvecF
lowerS <- meanvecS - sdvecS
upperS <- meanvecS + sdvecS

predict.table2I <- cbind(meanvecF, sdvecF, pvalvecF, lowerF, upperF, meanvecS, sdvecS, pvalvecS, lowerS, upperS) 
save(predict.table2I, file = "PredictTable2I.Rdata")

#######################################################
##predicting on only a random half of my data for cross-validation of this model

head(part.samp)
nrow(part.samp)
ind.vec <- sample(1:11249, 5625 , replace = F)
part.samp2 <- part.samp[ind.vec, ]
head(part.samp2)
meanvecF <- NULL
sdvecF <- NULL
pvalvecF <- NULL
meanvecS <- NULL
sdvecS <- NULL
pvalvecS <- NULL

for(i in year){
  wmeans <- apply(part.samp2[part.samp2$year == i, ], 2,  weighted.mean, w = part.samp2$weight2[part.samp2$year == i])
  calcdata <- wmeans[ - c(2, 15, 16, 17)]
  interactmean <- wmeans[5] * wmeans[10]
  calcdata <- c(calcdata, interactmean)
  names(calcdata)[14] <- "sumissue:age"
  pred.storageF<- NULL
  pred.storageS<- NULL
  
  for(j in 1:nrow(coeffsB)){ 
    predictedF <- exp(calcdata %*% bcsimbetas[j, 15:28])/(1 + exp(calcdata %*% bcsimbetas[j, 1:14]) + exp(calcdata%*% bcsimbetas[j, 15:28]) +  exp(calcdata %*% bcsimbetas[j, 29:42]))
    predictedS <- exp(calcdata %*% bcsimbetas[j, 1:14])/(1 + exp(calcdata %*% bcsimbetas[j, 1:14]) + exp(calcdata%*% bcsimbetas[j, 15:28]) +  exp(calcdata %*% bcsimbetas[j, 29:42]))
    pred.storageF[j] <- predictedF
    pred.storageS[j] <- predictedS
  }
  
  meanvecF[i] <- mean(pred.storageF)
  sdvecF[i] <- sd(pred.storageF)
  meanvecS[i] <- mean(pred.storageS)
  sdvecS[i] <- sd(pred.storageS)
  ate1 <- mean(pred.storageF)
  ate.SE1 <-  sd(pred.storageF)
  tstat1 <- (ate1-0)/ate.SE1
  pvalvecF[i] <- 2*(1 - pnorm(abs(tstat1), 0, 1))
  ate2 <- mean(pred.storageS)
  ate.SE2 <-  sd(pred.storageS)
  tstat2 <- (ate2-0)/ate.SE2
  pvalvecS[i] <- 2*(1 - pnorm(abs(tstat2), 0, 1))
}

lowerF <- meanvecF - sdvecF
upperF <- meanvecF + sdvecF
lowerS <- meanvecS - sdvecS
upperS <- meanvecS + sdvecS

predict.table2Ib <- cbind(meanvecF, sdvecF, pvalvecF, lowerF, upperF, meanvecS, sdvecS, pvalvecS, lowerS, upperS) 
save(predict.table2Ib, file = "PredictTable2Ib.Rdata")

###################################################
#################################################
##Model 3: Religserv1 addition

bootdat <- NULL
onecluster <- NULL
indexdat <- NULL
reps <- 2000
coeffs3 <- matrix(NA, nrow = reps, ncol = 42)
set.seed(1004)

clusterboot <- for(i in 1:reps){
  bootdat <- NULL  
  model.sim <- NULL
  clusters <- names(table(samp$year))
  draws <- sample( 1:length(clusters), length(clusters), replace=TRUE)  
  inds <- clusters[draws]
  print(i)
  for(j in 1:length(inds)){
    onecluster <- samp[as.character(samp$year) == inds[j], ]
    bootdat <- rbind(bootdat, onecluster)
  }
  frame.sim <- mlogit.data(bootdat, choice = "Newcatvoter3", shape = "wide")
  model.sim <- tryCatch(mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + religserv1, data = frame.sim, weights = weight2),
                        error = function(e){NA})
  if (is.na(model.sim)){i = i+1 }else{coeffs.long <- as.data.frame(model.sim$coefficients)[ , 1]}
  coeffs3[i, ] <- t(as.matrix(coeffs.long[1:42]))
}
save(coeffs3, file = "NewCoeffs3.Rdata")
load("NewCoeffs3.Rdata")

coeffsB<- as.matrix(coeffs3)
coeffsB <- na.omit(coeffsB)
dim(coeffsB)
expected.coeffs <- NULL
expected.coeffs <- apply(coeffsB, 2, mean)

##Model Standard Errors
sterrs <- NULL
for(i in 1:ncol(coeffsB)){ 
  vector <-coeffsB[, i]  
  sterrs[i] <- sd(vector)
}
##Model P-Values:
pvalue <- NULL
for(i in 1:ncol(coeffsB)){
  ate <- expected.coeffs[i]
  ate.SE <-  sterrs[i]
  tstat <- (ate-0)/ate.SE
  pvalue[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}

##Bias calculations:
original.coeffs <- as.data.frame(model3c$coefficients)
original.coeffs <- original.coeffs[ , 1]
original.coeffs <- as.vector(original.coeffs[1:42])
expected.vector <- as.vector(expected.coeffs)
bias <- original.coeffs - expected.vector
bc <- expected.vector + as.vector(bias)

#Table of coefficients, bootstrapped SEs, and p-values (where the 0-2 difference is the Standpatter to Floating Voter MNL estimate):
table <- cbind( names(model3c$coefficients), expected.coeffs, bc, sterrs, pvalue)
colnames(table) <- c( "" , "Bootstrapped Coefficient", "Bias-Corrected Coefficient", "Standard Error", "P-Value" )
table
save(table, file = "CoeffsTableModel3.Rdata")

##Predicted values: pulling relevant covariates; doing bias-correction across all bootstrapped coefficients
head(samp)
part.samp <- samp[ , c(1, 11, 6, 7, 8, 9, 15, 10, 17, 3, 16, 14, 13, 19, 21, 4)]
intercept <- rep(1, nrow(part.samp)) 
part.samp <- cbind(intercept, part.samp) 
dim(part.samp)
head(part.samp)
names(part.samp) <- c("intercept", "year", "efficacy", "education", "age", "female", "black", "totalmentions", "pidstrength2", 
                      "sumissue", "undecided", "ambivalent", "changerpdi", "reelectcamp", "Newcatvoter2", "weight2", "age::sumissue")

bc.coeffs2 <- matrix(NA, nrow(coeffsB), ncol(coeffsB))
for(i in 1:nrow(coeffsB)){
  bc.coeffs2[i, ] <-  coeffsB[i, ] + bias
}
bc.SR <- bc.coeffs2[ , c(1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40)]
bc.SF <- bc.coeffs2[ , c(2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41)]
bc.SS <- bc.coeffs2[ , c(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42)]
bcsimbetas <- as.matrix(cbind(bc.SR, bc.SF, bc.SS)) 

##Forloop for predicted values for each year's mean:
year <- c("1960", "1964", "1968", "1972", "1976", "1980", "1984", "1988", "1992", "1996", "2000", "2004")
meanvecF <- NULL
sdvecF <- NULL
pvalvecF <- NULL
meanvecS <- NULL
sdvecS <- NULL
pvalvecS <- NULL
head(part.samp)

for(i in year){
  wmeans <- apply(part.samp[part.samp$year == i, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year == i])
  calcdata <- wmeans[ - c(2, 15, 16)]
  pred.storageF<- NULL
  pred.storageS<- NULL
  for(j in 1:nrow(coeffsB)){ 
    predictedF <- exp(calcdata %*% bcsimbetas[j, 15:28])/(1 + exp(calcdata %*% bcsimbetas[j, 1:14]) + exp(calcdata%*% bcsimbetas[j, 15:28]) +  exp(calcdata %*% bcsimbetas[j, 29:42]))
    predictedS <- exp(calcdata %*% bcsimbetas[j, 1:14])/(1 + exp(calcdata %*% bcsimbetas[j, 1:14]) + exp(calcdata%*% bcsimbetas[j, 15:28]) +  exp(calcdata %*% bcsimbetas[j, 29:42]))
    pred.storageF[j] <- predictedF
    pred.storageS[j] <- predictedS
  }
  meanvecF[i] <- mean(pred.storageF)
  sdvecF[i] <- sd(pred.storageF)
  meanvecS[i] <- mean(pred.storageS)
  sdvecS[i] <- sd(pred.storageS)
  ate1 <- mean(pred.storageF)
  ate.SE1 <-  sd(pred.storageF)
  tstat1 <- (ate1-0)/ate.SE1
  pvalvecF[i] <- 2*(1 - pnorm(abs(tstat1), 0, 1))
  ate2 <- mean(pred.storageS)
  ate.SE2 <-  sd(pred.storageS)
  tstat2 <- (ate2-0)/ate.SE2
  pvalvecS[i] <- 2*(1 - pnorm(abs(tstat2), 0, 1))
}

lowerF <- meanvecF - sdvecF
upperF <- meanvecF + sdvecF
lowerS <- meanvecS - sdvecS
upperS <- meanvecS + sdvecS
predict.table3 <- cbind(meanvecF, sdvecF, pvalvecF, lowerF, upperF, meanvecS, sdvecS, pvalvecS, lowerS, upperS) 
save(predict.table3, file = "PredictTable3.Rdata")

##################################################################################################################
##################################################################################################################
##Model 4: factor(Religfine3) addition
bootdat <- NULL
onecluster <- NULL
indexdat <- NULL
reps <- 2000
coeffs4  <- matrix(NA, nrow = reps, ncol = 48)
set.seed(1004)

clusterboot <- for(i in 1:reps){
  bootdat <- NULL  
  model.sim <- NULL
  clusters <- names(table(samp$year))
  draws <- sample( 1:length(clusters), length(clusters), replace=TRUE)  
  inds <- clusters[draws]
  print(i)
  for(j in 1:length(inds)){
    onecluster <- samp[as.character(samp$year) == inds[j], ]
    bootdat <- rbind(bootdat, onecluster)
  }
  frame.sim <- mlogit.data(bootdat, choice = "Newcatvoter3", shape = "wide")
  model.sim <- tryCatch(mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + factor(religfine3), data = frame.sim, weights = weight2),
                        error = function(e){NA})
  if (is.na(model.sim)){i = i+1 }else{coeffs.long <- as.data.frame(model.sim$coefficients)[ , 1]}
  coeffs4[i, ] <- t(as.matrix(coeffs.long[1:48]))
}
save(coeffs4, file = "NewCoeffs4.Rdata")
load("NewCoeffs4.Rdata")

coeffsB<- as.matrix(coeffs4)
coeffsB <- na.omit(coeffsB)
dim(coeffsB)
expected.coeffs <- NULL
expected.coeffs <- apply(coeffsB, 2, mean)

##Model Standard Errors
sterrs <- NULL
for(i in 1:ncol(coeffsB)){ 
  vector <-coeffsB[, i]  
  sterrs[i] <- sd(vector)
}

##Model P-Values:
pvalue <- NULL
for(i in 1:ncol(coeffsB)){
  ate <- expected.coeffs[i]
  ate.SE <-  sterrs[i]
  tstat <- (ate-0)/ate.SE
  pvalue[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}

##Bias calculations:
original.coeffs <- as.data.frame(model3d$coefficients)
original.coeffs <- original.coeffs[ , 1]
original.coeffs <- as.vector(original.coeffs[1:48])
expected.vector <- as.vector(expected.coeffs)
bias <- original.coeffs - expected.vector
bc <- expected.vector + as.vector(bias)

table <- cbind( names(model3d$coefficients), expected.coeffs, bc, sterrs, pvalue)
colnames(table) <- c( "" , "Bootstrapped Coefficient", "Bias-Corrected Coefficient", "Standard Error", "P-Value" )
table
save(table, file = "CoeffsTableModel4.Rdata")

#Predicted values: pulling relevant covariates; doing bias-correction across all bootstrapped coefficients
length(model3d$coefficients)
head(samp)
part.samp <- samp[ , c(1, 11, 6, 7, 8, 9, 15, 10, 17, 3, 16, 14, 13, 19, 21)] 
intercept <- rep(1, nrow(part.samp)) 

samp$fac11 <- rep(0, nrow(samp))
samp$fac11[samp$religfine3 == 11] <- 1
samp$fac12 <- rep(0, nrow(samp))
samp$fac12[samp$religfine3 == 12] <- 1
samp$fac20 <- rep(0, nrow(samp))
samp$fac20[samp$religfine3 == 20] <- 1

part.samp <- cbind(intercept, part.samp, samp$fac11, samp$fac12, samp$fac20) 
dim(part.samp)
names(part.samp) <- c("intercept", "year", "efficacy", "education", "age", "female", "black", "totalmentions", "pidstrength2", 
                      "sumissue", "undecided", "ambivalent", "changerpdi", "reelectcamp", "Newcatvoter2", "weight2", "fac11", "fac12", "fac20")

bc.coeffs2 <- matrix(NA, nrow(coeffsB), ncol(coeffsB))
for(i in 1:nrow(coeffsB)){
  bc.coeffs2[i, ] <-  coeffsB[i, ] + bias
}
bc.SR <- bc.coeffs2[ , c(1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46)]
bc.SF <- bc.coeffs2[ , c(2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47)]
bc.SS <- bc.coeffs2[ , c(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48)]
bcsimbetas <- as.matrix(cbind(bc.SR, bc.SF, bc.SS)) 

##Forloop for predicted values for each year's mean:
year <- c("1960", "1964", "1968", "1972", "1976", "1980", "1984", "1988", "1992", "1996", "2000", "2004")
meanvecF <- NULL
sdvecF <- NULL
pvalvecF <- NULL
meanvecS <- NULL
sdvecS <- NULL
pvalvecS <- NULL
head(part.samp)

for(i in year){
  wmeans <- apply(part.samp[part.samp$year == i, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year == i])
  calcdata <- wmeans[ - c(2, 15, 16)]
  pred.storageF<- NULL
  pred.storageS<- NULL
  for(j in 1:nrow(coeffsB)){ 
    predictedF <- exp(calcdata %*% bcsimbetas[j, 17:32])/(1 + exp(calcdata %*% bcsimbetas[j, 1:16]) + exp(calcdata%*% bcsimbetas[j, 17:32]) +  exp(calcdata %*% bcsimbetas[j, 33:48]))
    predictedS <- exp(calcdata %*% bcsimbetas[j, 1:16])/(1 + exp(calcdata %*% bcsimbetas[j, 1:16]) + exp(calcdata%*% bcsimbetas[j, 17:32]) +  exp(calcdata %*% bcsimbetas[j, 33:48]))
    pred.storageF[j] <- predictedF
    pred.storageS[j] <- predictedS
  }
  meanvecF[i] <- mean(pred.storageF)
  sdvecF[i] <- sd(pred.storageF)
  meanvecS[i] <- mean(pred.storageS)
  sdvecS[i] <- sd(pred.storageS)
  ate1 <- mean(pred.storageF)
  ate.SE1 <-  sd(pred.storageF)
  tstat1 <- (ate1-0)/ate.SE1
  pvalvecF[i] <- 2*(1 - pnorm(abs(tstat1), 0, 1))
  ate2 <- mean(pred.storageS)
  ate.SE2 <-  sd(pred.storageS)
  tstat2 <- (ate2-0)/ate.SE2
  pvalvecS[i] <- 2*(1 - pnorm(abs(tstat2), 0, 1))
}

lowerF <- meanvecF - sdvecF
upperF <- meanvecF + sdvecF
lowerS <- meanvecS - sdvecS
upperS <- meanvecS + sdvecS
predict.table4 <- cbind(meanvecF, sdvecF, pvalvecF, lowerF, upperF, meanvecS, sdvecS, pvalvecS, lowerS, upperS) 
save(predict.table4, file = "PredictTable4.Rdata")

##################################################################################################################
##################################################################################################################
##Model 5: Relig additions (both)
bootdat <- NULL
onecluster <- NULL
indexdat <- NULL
reps <- 2000
coeffs5  <- matrix(NA, nrow = reps, ncol = 51)
set.seed(1004)

clusterboot <- for(i in 1:reps){
  bootdat <- NULL  
  model.sim <- NULL
  clusters <- names(table(samp$year))
  draws <- sample( 1:length(clusters), length(clusters), replace=TRUE)  
  inds <- clusters[draws]
  print(i)
  for(j in 1:length(inds)){
    onecluster <- samp[as.character(samp$year) == inds[j], ]
    bootdat <- rbind(bootdat, onecluster)
  }
  frame.sim <- mlogit.data(bootdat, choice = "Newcatvoter3", shape = "wide")
  model.sim <- tryCatch(mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + religserv1 + factor(religfine3), data = frame.sim, weights = weight2),
                        error = function(e){NA})
  if (is.na(model.sim)){i = i+1 }else{coeffs.long <- as.data.frame(model.sim$coefficients)[ , 1]}
  coeffs5[i, ] <- t(as.matrix(coeffs.long[1:51]))
}

save(coeffs5, file = "NewCoeffs5.Rdata")
load("NewCoeffs5.Rdata")

coeffsB<- as.matrix(coeffs5)
coeffsB <- na.omit(coeffsB)
dim(coeffsB)
expected.coeffs <- NULL
expected.coeffs <- apply(coeffsB, 2, mean)

##Model Standard Errors
sterrs <- NULL
for(i in 1:ncol(coeffsB)){ 
  vector <-coeffsB[, i]  
  sterrs[i] <- sd(vector)
}
##Model P-Values:
pvalue <- NULL
for(i in 1:51){
  ate <- expected.coeffs[i]
  ate.SE <-  sterrs[i]
  tstat <- (ate-0)/ate.SE
  pvalue[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}
pvalue
names(pvalue) <- names(model3e$coefficients)

##Bias calculations:
original.coeffs <- as.data.frame(model3e$coefficients)
original.coeffs <- original.coeffs[ , 1]
original.coeffs <- as.vector(original.coeffs[1:51])
expected.vector <- as.vector(expected.coeffs)
bias <- original.coeffs - expected.vector
bc <- expected.vector + as.vector(bias)

table <- cbind( names(model3e$coefficients), expected.coeffs, bc, sterrs, pvalue)
colnames(table) <- c( "" , "Bootstrapped Coefficient", "Bias-Corrected Coefficient", "Standard Error", "P-Value" )
table
save(table, file = "CoeffsTableModel5.Rdata")

#Predicted values: pulling relevant covariates; doing bias-correction across all bootstrapped coefficients
head(samp)
part.samp <- samp[ , c(1, 11, 6, 7, 8, 9, 5, 10, 17, 3, 16, 14, 13, 19, 21, 4)]
dim(part.samp)
intercept <- rep(1, nrow(part.samp)) 
samp$fac11 <- rep(0, nrow(samp))
samp$fac11[samp$religfine3 == 11] <- 1
samp$fac12 <- rep(0, nrow(samp))
samp$fac12[samp$religfine3 == 12] <- 1
samp$fac20 <- rep(0, nrow(samp))
samp$fac20[samp$religfine3 == 20] <- 1
part.samp <- cbind(intercept, part.samp, samp$fac11, samp$fac12, samp$fac20) 
dim(part.samp)
names(part.samp) <- c("intercept", "year", "efficacy", "education", "age", "female", "black", "totalmentions", "pidstrength2", 
                      "sumissue", "undecided", "ambivalent", "changerpdi", "reelectcamp", "Newcatvoter2", "weight2", "religserv1", "fac11", "fac12", "fac20")

bc.coeffs2 <- matrix(NA, nrow(coeffsB), 51)
for(i in 1:nrow(coeffsB)){
  bc.coeffs2[i, ] <-  coeffsB[i, ] + bias
}
bc.SR <- bc.coeffs2[ , c(1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49)]
bc.SF <- bc.coeffs2[ , c(2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50)]
bc.SS <- bc.coeffs2[ , c(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51)]
bcsimbetas <- as.matrix(cbind(bc.SR, bc.SF, bc.SS)) 

##Forloop for predicted values for each year's mean:
year <- c("1960", "1964", "1968", "1972", "1976", "1980", "1984", "1988", "1992", "1996", "2000", "2004")
meanvecF <- NULL
sdvecF <- NULL
pvalvecF <- NULL
meanvecS <- NULL
sdvecS <- NULL
pvalvecS <- NULL

for(i in year){
  wmeans <- apply(part.samp[part.samp$year == i, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year == i])
  calcdata <- wmeans[ - c(2, 15, 16)]
  pred.storageF <- NULL
  pred.storageS <- NULL
  for(j in 1:nrow(coeffsB)){ 
    predictedF <- exp(calcdata %*% bcsimbetas[j, 18:34])/(1 + exp(calcdata %*% bcsimbetas[j, 1:17]) + exp(calcdata%*% bcsimbetas[j, 18:34]) +  exp(calcdata %*% bcsimbetas[j, 35:51]))
    predictedS <- exp(calcdata %*% bcsimbetas[j, 1:17])/(1 + exp(calcdata %*% bcsimbetas[j, 1:17]) + exp(calcdata%*% bcsimbetas[j, 18:34]) +  exp(calcdata %*% bcsimbetas[j, 35:51]))
    pred.storageF[j] <- predictedF
    pred.storageS[j] <- predictedS
  }
  meanvecF[i] <- mean(pred.storageF)
  sdvecF[i] <- sd(pred.storageF)
  meanvecS[i] <- mean(pred.storageS)
  sdvecS[i] <- sd(pred.storageS)
  ate1 <- mean(pred.storageF)
  ate.SE1 <-  sd(pred.storageF)
  tstat1 <- (ate1-0)/ate.SE1
  pvalvecF[i] <- 2*(1 - pnorm(abs(tstat1), 0, 1)) 
  ate2 <- mean(pred.storageS)
  ate.SE2 <-  sd(pred.storageS)
  tstat2 <- (ate2-0)/ate.SE2
  pvalvecS[i] <- 2*(1 - pnorm(abs(tstat2), 0, 1))
}

lowerF <- meanvecF - sdvecF
upperF <- meanvecF + sdvecF
lowerS <- meanvecS - sdvecS
upperS <- meanvecS + sdvecS
predict.table5 <- cbind(meanvecF, sdvecF, pvalvecF, lowerF, upperF, meanvecS, sdvecS, pvalvecS, lowerS, upperS) 
save(predict.table5, file = "PredictTable5.Rdata")

##################################################
##Model 6: relig additions (both) + age*sumissue
head(samp)
table(samp$religserv1) 
samp$agesumissue <- samp$age*samp$sumissue

bootdat <- NULL
onecluster <- NULL
indexdat <- NULL
reps <- 2000
coeffs6  <- matrix(NA, nrow = reps, ncol = 54)
set.seed(1004)

clusterboot <- for(i in 1:reps){
  bootdat <- NULL  
  model.sim <- NULL
  clusters <- names(table(samp$year))
  draws <- sample( 1:length(clusters), length(clusters), replace=TRUE)  
  inds <- clusters[draws]
  print(i)
  for(j in 1:length(inds)){
    onecluster <- samp[as.character(samp$year) == inds[j], ]
    bootdat <- rbind(bootdat, onecluster)
  }
  frame.sim <- mlogit.data(bootdat, choice = "Newcatvoter3", shape = "wide")
  model.sim <- tryCatch(mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + religserv1 + factor(religfine3) + age*sumissue , data = frame.sim, weights = weight2),
                        error = function(e){NA})
  if (is.na(model.sim)){i = i+1 }else{coeffs.long <- as.data.frame(model.sim$coefficients)[ , 1]}
  coeffs6[i, ] <- t(as.matrix(coeffs.long[1:54]))
}
head(coeffs6)
save(coeffs6, file = "NewCoeffs6.Rdata")
load("NewCoeffs6.Rdata")

coeffsB<- as.matrix(coeffs6)
coeffsB <- na.omit(coeffsB)
dim(coeffsB)
expected.coeffs <- NULL
expected.coeffs <- apply(coeffsB, 2, mean)

##Model Standard Errors
sterrs <- NULL
for(i in 1:ncol(coeffsB)){ 
  vector <-coeffsB[, i]  
  sterrs[i] <- sd(vector)
}
##Model P-Values:
pvalue <- NULL
for(i in 1:54){
  ate <- expected.coeffs[i]
  ate.SE <-  sterrs[i]
  tstat <- (ate-0)/ate.SE
  pvalue[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}
pvalue
names(pvalue) <- names(model3f$coefficients)

##Bias calculations:
original.coeffs <- as.data.frame(model3f$coefficients)
original.coeffs <- original.coeffs[ , 1]
original.coeffs <- as.vector(original.coeffs[1:54])
expected.vector <- as.vector(expected.coeffs)
bias <- original.coeffs - expected.vector
bc <- expected.vector + as.vector(bias)

table <- cbind( names(model3f$coefficients), expected.coeffs, bc, sterrs, pvalue)
colnames(table) <- c( "" , "Bootstrapped Coefficient", "Bias-Corrected Coefficient", "Standard Error", "P-Value" )
table
save(table, file = "CoeffsTableModel6.Rdata")

#Predicted values: pulling relevant covariates; doing bias-correction across all bootstrapped coefficients
head(samp)
part.samp <- samp[ , c(1, 11, 6, 7, 8, 9, 15, 10, 17, 3, 16, 14, 13, 27, 24, 25, 26, 29, 20, 21)]
dim(part.samp)
intercept <- rep(1, nrow(part.samp)) 
part.samp <- cbind(intercept, part.samp) 
head(part.samp)
names(part.samp) <- c("intercept", "year", "efficacy", "education", "age", "female", "black", "totalmentions", "pidstrength2", 
                      "sumissue", "undecided", "ambivalent", "changerpdi", "reelectcamp", "religserv1", "fac11", "fac12", "fac20", "agesumissue", "Newcatvoter3", "weight2")

bc.coeffs2 <- matrix(NA, nrow(coeffsB), 54)
for(i in 1:nrow(coeffsB)){
  bc.coeffs2[i, ] <-  coeffsB[i, ] + bias
}
bc.SR <- bc.coeffs2[ , c(1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49, 52)]
bc.SF <- bc.coeffs2[ , c(2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53)]
bc.SS <- bc.coeffs2[ , c(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54)]
bcsimbetas <- as.matrix(cbind(bc.SR, bc.SF, bc.SS)) 

##Forloop for predicted values for each year's mean:
year <- c("1960", "1964", "1968", "1972", "1976", "1980", "1984", "1988", "1992", "1996", "2000", "2004")
meanvecF <- NULL
sdvecF <- NULL
pvalvecF <- NULL
meanvecS <- NULL
sdvecS <- NULL
pvalvecS <- NULL
head(part.samp[ , c(2, 20, 21)])
head(part.samp)

for(i in year){
  wmeans <- apply(part.samp[part.samp$year == i, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year == i])
  calcdata <- wmeans[ - c(2, 19, 20, 21)]
  interactmean <- wmeans[5] * wmeans[10]
  calcdata <- c(calcdata, interactmean)
  names(calcdata)[14] <- "sumissue:age"  
  
  pred.storageF <- NULL
  pred.storageS <- NULL
  for(j in 1:nrow(coeffsB)){ 
    predictedF <- exp(calcdata %*% bcsimbetas[j, 19:36])/(1 + exp(calcdata %*% bcsimbetas[j, 1:18]) + exp(calcdata%*% bcsimbetas[j, 19:36]) +  exp(calcdata %*% bcsimbetas[j, 37:54]))
    predictedS <- exp(calcdata %*% bcsimbetas[j, 1:18])/(1 + exp(calcdata %*% bcsimbetas[j, 1:18]) + exp(calcdata%*% bcsimbetas[j, 19:36]) +  exp(calcdata %*% bcsimbetas[j, 37:54]))
    pred.storageF[j] <- predictedF
    pred.storageS[j] <- predictedS
  }
  meanvecF[i] <- mean(pred.storageF)
  sdvecF[i] <- sd(pred.storageF)
  meanvecS[i] <- mean(pred.storageS)
  sdvecS[i] <- sd(pred.storageS)
  ate1 <- mean(pred.storageF)
  ate.SE1 <-  sd(pred.storageF)
  tstat1 <- (ate1-0)/ate.SE1
  pvalvecF[i] <- 2*(1 - pnorm(abs(tstat1), 0, 1))
  ate2 <- mean(pred.storageS)
  ate.SE2 <-  sd(pred.storageS)
  tstat2 <- (ate2-0)/ate.SE2
  pvalvecS[i] <- 2*(1 - pnorm(abs(tstat2), 0, 1))
}

lowerF <- meanvecF - sdvecF
upperF <- meanvecF + sdvecF
lowerS <- meanvecS - sdvecS
upperS <- meanvecS + sdvecS
predict.table6 <- cbind(meanvecF, sdvecF, pvalvecF, lowerF, upperF, meanvecS, sdvecS, pvalvecS, lowerS, upperS) 
save(predict.table6, file = "PredictTable6.Rdata")

###############################################################################################
################################################################################################
##First differences from this model:

##Religiosity pre-1980 only:
calcsamp <- part.samp[part.samp$year < 1980, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
# never attends
calcsamp$religserv1 <- rep(1, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageN80 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageN80[i] <- predictedF
}

serv.80.Never.FV <- mean(pred.storageN80)
sd.serv.80.Never.FV <- sd(pred.storageN80)

##Religiosity pre-1980 only:
calcsamp <- part.samp[part.samp$year < 1980, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
# attends weekly
calcsamp$religserv1 <- rep(3, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageW80 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageW80[i] <- predictedF
}

serv.80.Weekly.FV <- mean(pred.storageW80)
sd.serv.80.Weekly.FV <- sd(pred.storageW80)

#First Differences for church attendance: pre-80 era
FD.80.Religiosity <- serv.80.Never.FV - serv.80.Weekly.FV
difference.FD.80.Religiosity <- pred.storageN80 - pred.storageW80
sd(difference.FD.80.Religiosity)
sd.80.Religiosity <- 0.1774032

###################################################
##Religiosity post-2000 only:
calcsamp <- part.samp[part.samp$year > 2000, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
# never attends
calcsamp$religserv1 <- rep(1, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageN00 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageN00[i] <- predictedF
}

serv.00.Never.FV <- mean(pred.storageN00)
sd.serv.00.Never.FV <- sd(pred.storageN00)

##Religiosity post-2000 only:
calcsamp <- part.samp[part.samp$year >2000, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
# attends weekly
calcsamp$religserv1 <- rep(3, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageW00 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageW00[i] <- predictedF
}

serv.00.Weekly.FV <- mean(pred.storageW00)
sd.serv.00.Weekly.FV <- sd(pred.storageW00)

#First Differences for church attendance:
FD.00.Religiosity <- serv.00.Never.FV - serv.00.Weekly.FV
difference.00.Religiosity <- pred.storageN00 - pred.storageW00
sd.00.Religiosity <- sd(difference.00.Religiosity)

serv.80.Never.FV
serv.00.Never.FV
serv.80.Weekly.FV
serv.00.Weekly.FV

FD.80.Religiosity
FD.00.Religiosity

##############################################################################################
##First differences for Denomination

##Denomination pre-1980 only:
calcsamp <- part.samp[part.samp$year < 1980, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
#Mainline Prot only
calcsamp$fac11 <- rep(1, nrow(calcsamp))
calcsamp$fac12 <- rep(0, nrow(calcsamp))
calcsamp$fac12 <- rep(0, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageM80 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageM80[i] <- predictedF
}

Denom.80.Main.FV <- mean(pred.storageM80)
sd.Denom.80.Main.FV <- sd(pred.storageM80)

calcsamp <- part.samp[part.samp$year < 1980, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
#Evan Prot only
calcsamp$fac11 <- rep(0, nrow(calcsamp))
calcsamp$fac12 <- rep(1, nrow(calcsamp))
calcsamp$fac12 <- rep(0, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageE80 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageE80[i] <- predictedF
}

Denom.80.Evan.FV <- mean(pred.storageE80)
sd.Denom.80.Evan.FV <- sd(pred.storageE80)

#First Differences for Denom: pre-80 era
FD.80.Denom <- Denom.80.Main.FV - Denom.80.Evan.FV
difference.FD.80.Denom <- pred.storageM80 - pred.storageE80
sd(difference.FD.80.Denom)
sd.80.Denom <- 0.01237761

##Denomination post00 only:
calcsamp <- part.samp[part.samp$year >2000, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
#Mainline Prot only
calcsamp$fac11 <- rep(1, nrow(calcsamp))
calcsamp$fac12 <- rep(0, nrow(calcsamp))
calcsamp$fac12 <- rep(0, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageM00 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageM00[i] <- predictedF
}

Denom.00.Main.FV <- mean(pred.storageM00)
sd.Denom.00.Main.FV <- sd(pred.storageM00)

calcsamp <- part.samp[part.samp$year > 2000, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
#Evan Prot only
calcsamp$fac11 <- rep(0, nrow(calcsamp))
calcsamp$fac12 <- rep(1, nrow(calcsamp))
calcsamp$fac12 <- rep(0, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageE00 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageE00[i] <- predictedF
}

Denom.00.Evan.FV <- mean(pred.storageE00)
sd.Denom.00.Evan.FV <- sd(pred.storageE00)

#First Differences for denomination: post-00 era
FD.00.Denom <- Denom.00.Main.FV - Denom.00.Evan.FV
difference.FD.00.Denom <- pred.storageM00 - pred.storageE00
sd.00.Denom <- sd(difference.FD.00.Denom)

FD.80.Denom
FD.00.Denom
FD.80.Religiosity
FD.00.Religiosity

##############################################################################################
##First differences for Undecided

##Undecided pre-1980 only:
calcsamp <- part.samp[part.samp$year < 1980, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
#Undecided only
calcsamp$undecided <- rep(1, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageUD80 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageUD80[i] <- predictedF
}


calcsamp <- part.samp[part.samp$year < 1980, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
#Decided only
calcsamp$undecided <- rep(0, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageDD80 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageDD80[i] <- predictedF
}

#First Differences for Undecided: pre-80 era
FD.80.Undec <- pred.storageDD80 - pred.storageUD80
sd.FD.80.Undec <- sd(FD.80.Undec)
mean(pred.storageDD80)
mean(FD.80.Undec)
##Undecided post00 only:
calcsamp <- part.samp[part.samp$year >2000, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
calcsamp$undecided <- rep(1, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageUD00 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageUD00[i] <- predictedF
}

calcsamp <- part.samp[part.samp$year > 2000, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
#Decided only
calcsamp$undecided <- rep(0, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageDD00 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageDD00[i] <- predictedF
}

#First Differences for Undecided: post-00 era
FD.00.Undec <- pred.storageUD00 - pred.storageDD00
mean(FD.00.Undec)
sd.00.Undec <- sd(FD.00.Undec)
mean(pred.storageDD00)
mean(pred.storageUD00)

##############################################################################################
##First differences for Ambivalent

##Ambivalent pre-1980 only:
calcsamp <- part.samp[part.samp$year < 1980, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
calcsamp$ambivalent <- rep(1, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageAM80 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageAM80[i] <- predictedF
}

calcsamp <- part.samp[part.samp$year < 1980, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
calcsamp$ambivalent <- rep(0, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageUA80 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageUA80[i] <- predictedF
}

mean(pred.storageUA80)
#First Differences for Ambivalent: pre-80 era
FD.80.Amb <- pred.storageAM80 - pred.storageUA80
sd.80.Amb <- sd(FD.80.Amb)

##ambivalence post00 only:
calcsamp <- part.samp[part.samp$year >2000, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
calcsamp$ambivalent <- rep(1, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageAM00 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageAM00[i] <- predictedF
}

calcsamp <- part.samp[part.samp$year > 2000, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
calcsamp$ambivalent <- rep(0, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageUA00 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageUA00[i] <- predictedF
}

#First Differences for Undecided: post-00 era
FD.00.Amb <- pred.storageAM00 - pred.storageUA00
sd.00.Amb <- sd(FD.00.Amb)
mean(pred.storageAM00)
mean(pred.storageUA00)

##############################################################################################
##First differences for Age by Sumissue
table(part.samp$age)
quantile(part.samp$sumissue, 0.75)
table(part.samp$agesumissue)

18*0.2  # 3.6
50*0.8 # 40

##AgeSumissue pre-1980 only:
calcsamp <- part.samp[part.samp$year < 1980, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
calcsamp$age <- rep(18, nrow(calcsamp))
calcsamp$sumissue <- rep(0.2, nrow(calcsamp))
calcsamp$agesumissue <- rep(3.6, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageAS80 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageAS80[i] <- predictedF
}

calcsamp <- part.samp[part.samp$year < 1980, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
calcsamp$age <- rep(50, nrow(calcsamp))
calcsamp$sumissue <- rep(0.8, nrow(calcsamp))
calcsamp$agesumissue <- rep(40, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageASOld80 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageASOld80[i] <- predictedF
}

mean(pred.storageAS80)
mean(pred.storageASOld80)
FD.80.Agesum <- pred.storageASOld80 - pred.storageAS80
sd(FD.80.Agesum)

#AgeSumissue post-2000 only:
calcsamp <- part.samp[part.samp$year > 2000, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
calcsamp$age <- rep(18, nrow(calcsamp))
calcsamp$sumissue <- rep(0.2, nrow(calcsamp))
calcsamp$agesumissue <- rep(3.6, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageAS00 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageAS00[i] <- predictedF
}

calcsamp <- part.samp[part.samp$year > 2000, ]
calcsamp<- calcsamp[ , -c(2, 20, 21)]
calcsamp$age <- rep(50, nrow(calcsamp))
calcsamp$sumissue <- rep(0.8, nrow(calcsamp))
calcsamp$agesumissue <- rep(40, nrow(calcsamp))
calcsamp <- as.matrix(calcsamp)
head(calcsamp)
pred.storageASOld00 <- NULL

for(i in 1:nrow(calcsamp)){
  predictedF <- exp(calcsamp[i, ] %*% bc[19:36])/(1 + exp(calcsamp[i,] %*% bc[1:18]) + exp(calcsamp[i, ]%*% bc[19:36]) +  exp(calcsamp[i, ] %*% bc[37:54]))
  pred.storageASOld00[i] <- predictedF
}
mean(pred.storageAS00)
mean(pred.storageASOld00)
FD.00.Agesum <- pred.storageASOld00 - pred.storageAS00
mean(FD.00.Agesum)
sd(FD.00.Agesum)

#################################

install.packages("dotwhisker")
install.packages("broom")
install.packages("dplyr")
require(dotwhisker)
require(broom)
require(dplyr)

#order<- c(FD.80.Agesum, FD.00.Agesum, FD.80.Amb, FD.00.Amb, FD.80.Undec, FD.00.Undec, FD.80.Religiosity, FD.00.Religiosity, FD.80.Denom, FD.00.Denom)
estimate<- as.vector(c( 0.007,  0.013 , 0.018, 0.016, 0.023, 0.020, 0.075, 0.065,  0.010, 0.009))
term<- c("Age 18 with Low Issue Recognition to 50 with High" , "Age 18 with Low Issue Recognition to 50 with High" , "Unambivalent to Ambivalent Voter", "Unambivalent to Ambivalent Voter", "Undecided to Decided Voter", "Undecided to Decided Voter", "Never Attends Services to Monthly Attends", "Never Attends Services to Monthly Attends",  "Evangelical to Mainline Protestant", "Evangelical to Mainline Protestant")
reordered_vars <- c("Age 18 with Low Issue Recognition to 50 with High" , "Age 18 with Low Issue Recognition to 50 with High" ,"Unambivalent to Ambivalent Voter", "Unambivalent to Ambivalent Voter","Undecided to Decided Voter", "Undecided to Decided Voter", "Never Attends Services to Weekly Attends",  "Never Attends Services to Monthly Attends",   "Evangelical to Mainline Protestant", "Evangelical to Mainline Protestant")
term2<- c("(pre-1980)" , "Age 18 with Low Issue Recognition to 50 with High (post-2000)" , "(pre-1980)", "Unambivalent to Ambivalent Voter (post-2000)", "(pre-1980)", "Undecided to Decided Voter (post-2000)", "(pre-1980)", "Never Attends Services to Monthly Attends (post-2000)",  "(pre-1980)", "Evangelical to Mainline Protestant (post-2000)")
std.error <- as.vector(c(0.038, 0.054, 0.023, 0.023 , 0.028, 0.028, 0.177, 0.090, 0.012, 0.012))
model <- c("pre-1980", "post-2000", "pre-1980", "post-2000", "pre-1980", "post-2000", "pre-1980", "post-2000", "pre-1980", "post-2000")
Frame1980 <- cbind(term, estimate, std.error, model)
breaks <- c(-0.02, 0.0, 0.02, 0.04, 0.06, 0.08, 0.1, 0.12, 0.14, 0.16)
Frame1980 <- as.data.frame(Frame1980)

#FDplot <- dwplot(Frame1980, alpha = 0.1, dodge_size = 0.05, aes(x=estimate, y = Frame1980$term) )+
#  xlab("Change in Predicted Probability of Being a Floating Voter") +
#  ggtitle("First Difference on Key Covariates by Era") +
#  geom_vline(xintercept = 0, col = "gray", lty = 2) +
#  scale_x_discrete(breaks = c(-0.02, 0.0, 0.02, 0.04, 0.06, 0.08, 0.1, 0.12, 0.14, 0.16))

dev.off()
y.axis <- c(1, 1.25, 2.75, 3, 4, 4.25, 5.75, 6, 7, 7.25)
par(mar=c(2, 17, 2, 2))
plot(estimate, y.axis , col = rep(c("cadetblue3", "coral2"), 5),type = "p", axes = F, xlab = "", ylab = "", pch = 20, cex = 1.2,
     xlim = c(-0.1, 0.3), xaxs = "r", main = "Change in Predicted Probability \n of Being a Floating Voter", cex.main = 0.7)
#rect(par("usr")[1], par("usr")[3], par("usr")[2], par("usr")[4], col = 
#       "gray85", add = T)
segments(estimate - std.error, y.axis, estimate + std.error, y.axis, lwd = 1.5, col = rep(c("cadetblue3", "coral2"), 5))  
abline(v = 0, lty = 2)
axis(1, at = seq(-0.2, 0.4, by= 0.04), labels = seq(-0.2, 0.4, by =0.04), tick = T, cex.axis = 0.7, mgp = c(1, .5, 0))
axis(2, at = y.axis, label = term2, las = 1, tick = T, mgp = c(2, 0.6, 0),
     cex.axis = 0.7)


summary(model3f)

############################################################################################################################
############################################################################################################################
##Graphing predicted values for above models:
require(ggplot2)

##Model 1:
load("PredictTable1.Rdata")
predict.table1 <- as.data.frame(predict.table1)
Predicted_Probability <- c(as.vector(predict.table1$meanvecF), as.vector(predict.table1$meanvecS)) 
Year <- rep(rownames(predict.table1), 2)
group <- c(rep("FV", 12), rep ("SP", 12))
Type <- c(rep("Floating Voter", 12), rep("Standpatter", 12))
upper <- c(as.vector(predict.table1$upperF), as.vector(predict.table1$upperS))
lower <- c(as.vector(predict.table1$lowerF), as.vector(predict.table1$lowerS))

graphdata1 <- data.frame(estimate = Predicted_Probability, year = Year, ymin= lower, ymax = upper, groupvars = group, colour = Type)
pd <- position_dodge(0.2)
ggplot(graphdata1, aes(x=Year, y=Predicted_Probability, colour = Type, group = group) )+
  geom_line(position = pd) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin=lower, ymax=upper), position = pd, width=.1) +
  ggtitle("Smidt's Original Model")

##Model 2:
load("PredictTable2.Rdata")
predict.table2 <- as.data.frame(predict.table2)
Predicted_Probability <- c(as.vector(predict.table2$meanvecF), as.vector(predict.table2$meanvecS)) 
Year <- rep(rownames(predict.table2), 2)
group <- c(rep("FV", 12), rep ("SP", 12))
Type <- c(rep("Floating Voter", 12), rep("Standpatter", 12))
upper <- c(as.vector(predict.table2$upperF), as.vector(predict.table2$upperS))
lower <- c(as.vector(predict.table2$lowerF), as.vector(predict.table2$lowerS))

graphdata2 <- data.frame(estimate = Predicted_Probability, year = Year, ymin= lower, ymax = upper, groupvars = group, colour = Type)
pd <- position_dodge(0.2)
ggplot(graphdata2, aes(x=Year, y=Predicted_Probability, colour = Type, group = group) )+
  geom_line(position = pd) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin=lower, ymax=upper), position = pd, width=.1) +
  ggtitle("Model Interacting Age and Recognition of Issue Differences")

##Model 2I:
load("PredictTable2I.Rdata")
predict.table2I <- as.data.frame(predict.table2I)
Predicted_Probability <- c(as.vector(predict.table2I$meanvecF), as.vector(predict.table2I$meanvecS)) 
Year <- rep(rownames(predict.table2I), 2)
group <- c(rep("FV", 12), rep ("SP", 12))
Type <- c(rep("Floating Voter", 12), rep("Standpatter", 12))
upper <- c(as.vector(predict.table2I$upperF), as.vector(predict.table2I$upperS))
lower <- c(as.vector(predict.table2I$lowerF), as.vector(predict.table2I$lowerS))

graphdata2I <- data.frame(estimate = Predicted_Probability, year = Year, ymin= lower, ymax = upper, groupvars = group, colour = Type)
pd <- position_dodge(0.2)
ggplot(graphdata2I, aes(x=Year, y=Predicted_Probability, colour = Type, group = group) )+
  geom_line(position = pd) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin=lower, ymax=upper), position = pd, width=.1) +
  ggtitle("Model Interacting Age and Recognition of Issue Differences")

##Model 2Ib:
load("PredictTable2I.Rdata")
predict.table2Ib <- as.data.frame(predict.table2Ib)
Predicted_Probability <- c(as.vector(predict.table2Ib$meanvecF), as.vector(predict.table2Ib$meanvecS)) 
Year <- rep(rownames(predict.table2Ib), 2)
group <- c(rep("FV", 12), rep ("SP", 12))
Type <- c(rep("Floating Voter", 12), rep("Standpatter", 12))
upper <- c(as.vector(predict.table2Ib$upperF), as.vector(predict.table2Ib$upperS))
lower <- c(as.vector(predict.table2Ib$lowerF), as.vector(predict.table2Ib$lowerS))

graphdata2Ib <- data.frame(estimate = Predicted_Probability, year = Year, ymin= lower, ymax = upper, groupvars = group, colour = Type)
pd <- position_dodge(0.2)
ggplot(graphdata2Ib, aes(x=Year, y=Predicted_Probability, colour = Type, group = group) )+
  geom_line(position = pd) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin=lower, ymax=upper), position = pd, width=.1) +
  ggtitle("Model Interacting Age and Recognition of Issue Differences")

##Model 3:
load("PredictTable3.Rdata")
predict.table3 <- as.data.frame(predict.table3)
Predicted_Probability <- c(as.vector(predict.table3$meanvecF), as.vector(predict.table3$meanvecS)) 
Year <- rep(rownames(predict.table3), 2)
group <- c(rep("FV", 12), rep ("SP", 12))
Type <- c(rep("Floating Voter", 12), rep("Standpatter", 12))
upper <- c(as.vector(predict.table3$upperF), as.vector(predict.table3$upperS))
lower <- c(as.vector(predict.table3$lowerF), as.vector(predict.table3$lowerS))

graphdata3 <- data.frame(estimate = Predicted_Probability, year = Year, ymin= lower, ymax = upper, groupvars = group, colour = Type)
pd <- position_dodge(0.2)
ggplot(graphdata3, aes(x=Year, y=Predicted_Probability, colour = Type, group = group) )+
  geom_line(position = pd) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin=lower, ymax=upper), position = pd, width=.1) +
  ggtitle("Model Adding Religiosity Variable (No Age/Issue Difference Interaction)")

##Model 4:
load("PredictTable4.Rdata")
predict.table4 <- as.data.frame(predict.table4)
Predicted_Probability <- c(as.vector(predict.table4$meanvecF), as.vector(predict.table4$meanvecS)) 
Year <- rep(rownames(predict.table4), 2)
group <- c(rep("FV", 12), rep ("SP", 12))
Type <- c(rep("Floating Voter", 12), rep("Standpatter", 12))
upper <- c(as.vector(predict.table4$upperF), as.vector(predict.table4$upperS))
lower <- c(as.vector(predict.table4$lowerF), as.vector(predict.table4$lowerS))

graphdata4 <- data.frame(estimate = Predicted_Probability, year = Year, ymin= lower, ymax = upper, groupvars = group, colour = Type)
pd <- position_dodge(0.2)
ggplot(graphdata4, aes(x=Year, y=Predicted_Probability, colour = Type, group = group)) +
  geom_line(position = pd) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin=lower, ymax=upper), position = pd, width=.1) +
  ggtitle("Model Adding Religious Denomination Variable (No Age/Issue Difference Interaction)")

##Model 5:
load("PredictTable5.Rdata")
predict.table5 <- as.data.frame(predict.table5)
Predicted_Probability <- c(as.vector(predict.table5$meanvecF), as.vector(predict.table5$meanvecS)) 
Year <- rep(rownames(predict.table5), 2)
group <- c(rep("FV", 12), rep ("SP", 12))
Type <- c(rep("Floating Voter", 12), rep("Standpatter", 12))
upper <- c(as.vector(predict.table5$upperF), as.vector(predict.table5$upperS))
lower <- c(as.vector(predict.table5$lowerF), as.vector(predict.table5$lowerS))

graphdata5 <- data.frame(estimate = Predicted_Probability, year = Year, ymin= lower, ymax = upper, groupvars = group, colour = Type)
pd <- position_dodge(0.2)
ggplot(graphdata5, aes(x=Year, y=Predicted_Probability, colour = Type, group = group) )+
  geom_line(position = pd) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin=lower, ymax=upper), position = pd, width=.1) +
  ggtitle("Model Adding Religiosity and Religious Denomination Variables (No Age/Issue Difference Interaction)")

##Model 6:
load("PredictTable6.Rdata")
predict.table6 <- as.data.frame(predict.table6)
Predicted_Probability <- c(as.vector(predict.table6$meanvecF), as.vector(predict.table6$meanvecS)) 
Year <- rep(rownames(predict.table6), 2)
group <- c(rep("FV", 12), rep ("SP", 12))
Type <- c(rep("Floating Voter", 12), rep("Standpatter", 12))
upper <- c(as.vector(predict.table6$upperF), as.vector(predict.table6$upperS))
lower <- c(as.vector(predict.table6$lowerF), as.vector(predict.table6$lowerS))

graphdata6 <- data.frame(estimate = Predicted_Probability, year = Year, ymin= lower, ymax = upper, groupvars = group, colour = Type)
pd <- position_dodge(0.2)
ggplot(graphdata6, aes(x=Year, y=Predicted_Probability, colour = Type, group = group) )+
  geom_line(position = pd) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin=lower, ymax=upper), position = pd, width=.1) +
  ggtitle("Model Adding Religion Variables, Interacting Age and Recognition of Issue Differences")

##################################################################################################################################################
##################################################################################################################################################
##Sensitivity Analysis for Across-Era Effects at different Cut-offs: 

##Regaining Smidt's original data:
statadata.orig <- statadata[  , c(1, 3, 6,  46, 47, 48, 49, 51, 55, 57, 58, 60, 72, 73, 80)]
head(statadata.orig)

statadata.orig$Newcatvoter2 <- as.numeric(statadata.orig$catvoter2)
statadata.orig$Newcatvoter2[statadata.orig$Newcatvoter2== 1] <- 1
statadata.orig$Newcatvoter2[statadata.orig$Newcatvoter2== 2] <- 0
statadata.orig$Newcatvoter2[statadata.orig$Newcatvoter2== 3] <- 2
statadata.orig$Newcatvoter2[statadata.orig$Newcatvoter2== 4] <- 3
table(statadata.orig$Newcatvoter2)

frame1 <- mlogit.data(statadata.orig, shape="wide", choice="Newcatvoter2")
model1 <- mlogit(Newcatvoter2 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp, data = frame1, weights = weight)
summary(model1)

samp.orig <- na.omit(statadata.orig)
nrow(samp.orig)
sum <- sum(samp.orig$weight)
samp.orig$weight2 <- samp.orig$weight*(nrow(samp.orig)/sum)
head(samp.orig$weight2)
summary(samp.orig$weight2) 

frame.o <- mlogit.data(samp.orig, shape="wide", choice="Newcatvoter2")
model3o <- mlogit(Newcatvoter2 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp, data = frame.o, weights = weight2)
summary(model3o)
actual <- model3o$coefficients[1:39]

##Getting bootstrapped coefficients:
bootdat <- NULL
onecluster <- NULL
indexdat <- NULL
reps <- 2000
coeffs.orig  <- matrix(NA, nrow = reps, ncol = 39)
set.seed(1004)

clusterboot <- for(i in 1:reps){
  bootdat <- NULL  
  model.sim <- NULL
  clusters <- names(table(samp.orig$year))
  draws <- sample( 1:length(clusters), length(clusters), replace=TRUE)  
  inds <- clusters[draws]
  print(i)
  for(j in 1:length(inds)){
    onecluster <- samp.orig[as.character(samp.orig$year) == inds[j], ] 
    bootdat <- rbind(bootdat, onecluster)
  }
  frame.sim <- mlogit.data(bootdat, choice = "Newcatvoter2", shape = "wide")
  model.sim <- tryCatch(mlogit(Newcatvoter2 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp, data = frame.sim, weights = weight2),
                        error = function(e){NA})
  if (is.na(model.sim)){i = i+1 }else{coeffs.long <- as.data.frame(model.sim$coefficients)[ , 1]}
  coeffs.orig[i, ] <- t(as.matrix(coeffs.long[1:39]))
}

save(coeffs.orig, file = "OriginalCoeffsforSensitivity.Rdata")
dim(coeffs.orig)

load("OriginalCoeffsforSensitivity.Rdata")
head(coeffs.orig)
coeffsB <- coeffs.orig
coeffsB <- na.omit(coeffsB)
expected.coeffs <- apply(coeffsB, 2, mean)
bias <- actual - expected.coeffs

bc.coeffs2 <- matrix(NA, nrow(coeffsB), ncol(coeffsB))
for(i in 1:nrow(coeffsB)){
  bc.coeffs2[i, ] <-  coeffsB[i, ] + bias
}
bc.SR <- bc.coeffs2[ , c(1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37)]
bc.SF <- bc.coeffs2[ , c(2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38)]
bc.SS <- bc.coeffs2[ , c(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39)]
bcsimbetas <- as.matrix(cbind(bc.SR, bc.SF, bc.SS)) 
head(bcsimbetas)

##getting weighted means for relevant data:
head(samp.orig)
part.samp <- samp.orig[ , c(1, 9, 4, 5, 6, 7, 13, 8, 15, 3, 14, 12, 11, 16, 17)]
head(part.samp)
intercept <- rep(1, nrow(part.samp)) 
part.samp <- cbind(intercept, part.samp)

## 80 00 cutoffs
wmeans.c80 <- apply(part.samp[part.samp$year < 1980, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year < 1980])
wmeans.c00 <- apply(part.samp[part.samp$year > 2000, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year > 2000])
wmeans.c80 <- wmeans.c80[-c(2, 15, 16)]
wmeans.c00 <- wmeans.c00[-c(2, 15, 16)]
length(wmeans.c80)
wmeans.c00

## 70 92 cutoffs
wmeans.c70 <- apply(part.samp[part.samp$year < 1970, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year < 1970])
wmeans.c92 <- apply(part.samp[part.samp$year > 1992, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year > 1992])
wmeans.c70 <- wmeans.c70[-c(2, 15, 16)]
wmeans.c92 <- wmeans.c92[-c(2, 15, 16)]
length(wmeans.c70)
wmeans.c92

## 90 cutoff
wmeans.c89 <- apply(part.samp[part.samp$year < 1990, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year < 1990])
wmeans.c90 <- apply(part.samp[part.samp$year > 1990, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year > 1990])
wmeans.c89 <- wmeans.c89[-c(2, 15, 16)]
wmeans.c90 <- wmeans.c90[-c(2, 15, 16)]
length(wmeans.c89)
wmeans.c90

##Forloop for predicted values...AEE of each variable with observed data, change in mean from pre80 to post00:
wmeans.c80
wmeans.c00

head(bcsimbetas)

ae.effect <- NULL
sd.effect <- NULL
pvalue.effect <- NULL

for(i in 1:13){
  calcdata <- part.samp[ , - c(2, 15, 16)]
  calcdata[ , i] <- rep(wmeans.c80[i], nrow(calcdata)) #replace each column with a covariate mean
  calcdata <- as.matrix(calcdata)
  head(calcdata)
  pred80 <- rep(NA, nrow(bcsimbetas))
  for(j in 1:nrow(bcsimbetas)){
    predict80  <- exp(calcdata %*% bcsimbetas[j, 14:26])/(1 + exp(calcdata %*% bcsimbetas[j, 1:13]) + exp(calcdata%*% bcsimbetas[j, 14:26]) +  exp(calcdata %*% bcsimbetas[j, 27:39]))
    pred80[j] <- predict80
  }
  calcdata <-  (part.samp[ , - c(2, 15, 16)])
  calcdata[ , i] <- rep(wmeans.c00[i], nrow(calcdata)) #replace each column with a covariate mean
  calcdata <- as.matrix(calcdata)
  pred00 <- rep(NA, nrow(bcsimbetas))
  for(k in 1:nrow(bcsimbetas)){
    predict00 <- exp(calcdata %*% bcsimbetas[k, 14:26])/(1 + exp(calcdata %*% bcsimbetas[k, 1:13]) + exp(calcdata%*% bcsimbetas[k, 14:26]) +  exp(calcdata %*% bcsimbetas[k, 27:39]))
    pred00[k] <- predict00
  } 
  effect.vec <- pred00 - pred80  
  ae.effect[i] <- mean(na.omit(effect.vec))
  sd.effect[i] <- sd(na.omit(effect.vec))
  ate <- mean(na.omit(effect.vec))
  ate.SE <-  sd(na.omit(effect.vec))
  tstat <- (ate-0)/ate.SE
  pvalue.effect[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}

AEE.orig <- cbind(names(model3o$coefficients), ae.effect, sd.effect, pvalue.effect)
save(AEE.orig, file = "AEEOriginal.Rdata")

################################################################ 
#################################################################
##AEE of each variable with observed data, change in mean from pre70 to post92:
wmeans.c70
wmeans.c92
head(part.samp)
calcdata <- part.samp
pred70 <- NULL
pred92 <- NULL
ae.effect2 <- NULL
sd.effect2 <- NULL
pvalue.effect2 <- NULL

for(i in 1:13){
  calcdata <- (part.samp[ , - c(2, 15, 16)])
  calcdata[ , i] <- rep(wmeans.c70[i], nrow(calcdata)) #replace each column with a covariate mean
  calcdata <- as.matrix(calcdata)
  for(j in 1:nrow(bcsimbetas)){
    predict70 <- exp(calcdata %*% bcsimbetas[j, 14:26])/(1 + exp(calcdata %*% bcsimbetas[j, 1:13]) + exp(calcdata%*% bcsimbetas[j, 14:26]) +  exp(calcdata %*% bcsimbetas[j, 27:39]))
    pred70[j] <- predict70
  }
  calcdata <-  (part.samp[ , - c(2, 15, 16)])
  calcdata[ , i] <- rep(wmeans.c92[i], nrow(calcdata)) #replace each column with a covariate mean
  calcdata <- as.matrix(calcdata)
  for(k in 1:nrow(bcsimbetas)){
    predict92 <- exp(calcdata %*% bcsimbetas[k, 14:26])/(1 + exp(calcdata %*% bcsimbetas[k, 1:13]) + exp(calcdata%*% bcsimbetas[k, 14:26]) +  exp(calcdata %*% bcsimbetas[k, 27:39]))
    pred92[k] <- predict92
  }
  effect.vec <- pred92 - pred70  
  ae.effect2[i] <- mean(effect.vec)
  sd.effect2[i] <- sd(effect.vec)
  ate <- mean(effect.vec)
  ate.SE <-  sd(effect.vec)
  tstat <- (ate-0)/ate.SE
  pvalue.effect2[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}

AEE.72 <- cbind(names(model3o$coefficients), ae.effect2, sd.effect2, pvalue.effect2)
save(AEE.72, file = "AEETable72.Rdata")

##Forloop for predicted values...AEE of each variable with observed data, change in mean from pre89 to post90:
wmeans.c89
wmeans.c90
head(part.samp)
calcdata <- part.samp
pred89 <- NULL
pred90 <- NULL
ae.effect3 <- NULL
sd.effect3 <- NULL
pvalue.effect3 <- NULL

for(i in 1:13){
  calcdata <- (part.samp[ , - c(2, 15, 16)])
  calcdata[ , i] <- rep(wmeans.c89[i], nrow(calcdata)) #replace each column with a covariate mean
  calcdata <- as.matrix(calcdata)
  for(j in 1:nrow(bcsimbetas)){
    pred89[j] <- exp(calcdata %*% bcsimbetas[j, 14:26])/(1 + exp(calcdata %*% bcsimbetas[j, 1:13]) + exp(calcdata%*% bcsimbetas[j, 14:26]) +  exp(calcdata %*% bcsimbetas[j, 27:39]))
  }
  calcdata <-  (part.samp[ , - c(2, 15, 16)])
  calcdata[ , i] <- rep(wmeans.c90[i], nrow(calcdata)) #replace each column with a covariate mean
  calcdata <- as.matrix(calcdata)
  for(k in 1:nrow(bcsimbetas)){
    pred90[k] <- exp(calcdata %*% bcsimbetas[k, 14:26])/(1 + exp(calcdata %*% bcsimbetas[k, 1:13]) + exp(calcdata%*% bcsimbetas[k, 14:26]) +  exp(calcdata %*% bcsimbetas[k, 27:39]))
  } 
  effect.vec <- pred90 - pred89
  ae.effect3[i] <- mean(effect.vec)
  sd.effect3[i] <- sd(effect.vec)
  ate <- mean(effect.vec)
  ate.SE <-  sd(effect.vec)
  tstat <- (ate-0)/ate.SE
  pvalue.effect3[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}

AEE.90 <- cbind(names(model3o$coefficients), ae.effect3, sd.effect3, pvalue.effect3)
save(AEE.90, file = "AEETable90.Rdata")

AEE.orig
AEE.72
AEE.90


###########################################################################################
###########################################################################################
#Exploring Across-Era Effects with Matching Methods:

#############################################################
##Developing treated and control groups for pre-80 and post-20 era. 
samp$era <- NA
samp$era[samp$year < 1980] <- 0
samp$era[samp$year > 2000] <- 1
table(samp$era)

matchsamp <- samp[ , c(1, 3, 4,  6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 20, 21, 22, 23)]
head(matchsamp)
matchsamp <- na.omit(matchsamp)
matchsamp <- as.data.frame(matchsamp)
nrow(matchsamp)
rownamevec <- rownames(matchsamp)

######################################################3
###Trying Mahalanobis Distance Matching:

head(matchsamp)
X <- matchsamp[ , -c(1, 3, 15, 16)]
head(X)

mat <- as.matrix(X)
s <- cov(mat)  

func <- function(X1, X2, s){  
  return( sqrt( t(as.matrix(X1 - X2)) %*% solve(s) %*% as.matrix(X1- X2) ) )
}    

treat4 <- matchsamp[matchsamp$era ==1, ]
control4 <- matchsamp[matchsamp$era ==0, ]
nrow(treat4)
nrow(control4)

store4 <- rep(NA, nrow(treat4))

for(i in 1:nrow(treat4)){ 
  X1 <- treat4[i, ]
  X1 <- X1[-c(1, 3, 15, 16)]
  X1 <- as.vector(X1)
  X1 <- t(X1)
  
  val <- NULL
  
  for(j in 1:nrow(control4)){
    X2<- control4[j,]
    X2 <- X2[-c(1, 3, 15, 16)]
    X2 <- t(as.vector(X2))
    compare <- func(X1, X2, s)
    val[j] <- compare
  }
  
  store4[i] <- which(val == min(val))
}

head(store4)
save(store4, file = "MatchedIndex.Rdata")

newcontrol4 <- control4[store4,  ]
new4 <- rbind(treat4, newcontrol4)
head(new4)

save(new4, file = "MDMatched.Rdata")

summary(new4)
nrow(new4)

##reruning the model with the matched data:

matchedframe <- mlogit.data(new4, shape="wide", choice="Newcatvoter3")
matchedmodel <- mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + religserv + factor(religfine3), data = matchedframe, weights = weight2)
summary(matchedmodel)
length(matchedmodel$coefficients)

bootdat <- NULL
onecluster <- NULL
indexdat <- NULL
reps <- 2000
coeffs.m  <- matrix(NA, nrow = reps, ncol = 51)

set.seed(1004)

clusterboot <- for(i in 1:reps){
  
  bootdat <- NULL  
  model.sim <- NULL
  clusters <- names(table(new4$year))
  draws <- sample( 1:length(clusters), length(clusters), replace=TRUE)  
  inds <- clusters[draws]
  print(i)
  
  for(j in 1:length(inds)){
    onecluster <- samp[as.character(new4$year) == inds[j], ]  ###NEED TO CHANGE THIS SAMP AND RECREATE.
    bootdat <- rbind(bootdat, onecluster)
  }
  
  frame.sim <- mlogit.data(bootdat, choice = "Newcatvoter3", shape = "wide")
  model.sim <- tryCatch(mlogit(Newcatvoter3 ~ 0 | efficacy + education + age + female + black + totalmentions + pidstrength2 + sumissue + undecided + ambivalent + changerdpi + reelectcamp + religserv + factor(religfine3), data = frame.sim, weights = weight2),
                        error = function(e){NA})
  if (is.na(model.sim)){i = i+1 }else{coeffs.long <- as.data.frame(model.sim$coefficients)[ , 1]}
  coeffs.m[i, ] <- t(as.matrix(coeffs.long[1:51]))
  
}

head(coeffs.m)
save(coeffs.m, file = "NewCoeffsM.Rdata")

coeffsB<- as.matrix(coeffs.m)
coeffsB <- na.omit(coeffsB)
dim(coeffsB)

expected.coeffs <- NULL
expected.coeffs <- apply(coeffsB, 2, mean)

##Model Standard Errors
sterrs <- NULL

for(i in 1:ncol(coeffsB)){ 
  vector <-coeffsB[, i]  
  sterrs[i] <- sd(vector)
}

##Model P-Values:
pvalue <- NULL

for(i in 1:51){
  ate <- expected.coeffs[i]
  ate.SE <-  sterrs[i]
  tstat <- (ate-0)/ate.SE
  pvalue[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}

pvalue
names(pvalue) <- names(matchedmodel$coefficients)

##Bias calculations:

original.coeffs <- as.data.frame(matchedmodel$coefficients)
original.coeffs <- original.coeffs[ , 1]
original.coeffs <- as.vector(original.coeffs[1:51])
expected.vector <- as.vector(expected.coeffs)

bias <- original.coeffs - expected.vector
bc <- expected.vector + as.vector(bias)

##Table of coefficients, bootstrapped SEs, and p-values (where the 0-2 difference is the Standpatter to Floating Voter MNL estimate):

table <- cbind( names(matchedmodel$coefficients), expected.coeffs, bc, sterrs, pvalue)
colnames(table) <- c( "" , "Bootstrapped Coefficient", "Bias-Corrected Coefficient", "Standard Error", "P-Value" )
table

save(table, file = "MatchedCoeffTable.Rdata")

######################################################

##Getting ready to find predicted values: pulling relevant covariates; doing bias-correction across all bootstrapped coefficients

head(new4)
part.samp <- new4[ , c(1, 9, 4, 5, 6, 7, 12, 8, 14, 2, 13, 11, 10, 15, 16, 3 )]
head(part.samp)
intercept <- rep(1, nrow(part.samp)) 
new4$fac11 <- rep(0, nrow(new4))
new4$fac11[new4$religfine3 == 11] <- 1
new4$fac12 <- rep(0, nrow(new4))
new4$fac12[new4$religfine3 == 12] <- 1
new4$fac20 <- rep(0, nrow(new4))
new4$fac20[new4$religfine3 == 20] <- 1

part.samp <- cbind(intercept, part.samp, new4$fac11, new4$fac12, new4$fac20) 

names(part.samp) <- c("intercept", "year", "efficacy", "education", "age", "female", "black", "totalmentions", "pidstrength2", 
                      "sumissue", "undecided", "ambivalent", "changerpdi", "reelectcamp", "Newcatvoter2", "weight2", "religserv", "relig11", "relig12", "relig20")
dim(part.samp)
head(part.samp)
######

bc.coeffs2 <- matrix(NA, nrow(coeffsB), 51)
for(i in 1:2000){
  bc.coeffs2[i, ] <-  coeffsB[i, ] + bias
}

bc.SR <- bc.coeffs2[ , c(1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49)]
bc.SF <- bc.coeffs2[ , c(2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50)]
bc.SS <- bc.coeffs2[ , c(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51)]
bcsimbetas <- as.matrix(cbind(bc.SR, bc.SF, bc.SS)) 
head(bcsimbetas)

wmeans.c80 <- apply(part.samp[part.samp$year < 1980, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year < 1980])
wmeans.c00 <- apply(part.samp[part.samp$year > 2000, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year > 2000])
wmeans.c80 <- wmeans.c80[-c(2, 15, 16)]
wmeans.c00 <- wmeans.c00[-c(2, 15, 16)]
length(wmeans.c80)
wmeans.c00

##Forloop for predicted values...overal model at each era's means with matched sample:

for(j in 1:2000){ 
  predicted80 <- exp(wmeans.c80 %*% bcsimbetas[j, 18:34])/(1 + exp(wmeans.c80 %*% bcsimbetas[j, 1:17]) + exp(wmeans.c80%*% bcsimbetas[j, 18:34]) +  exp(wmeans.c80 %*% bcsimbetas[j, 35:51]))
  predicted00 <- exp(wmeans.c00 %*% bcsimbetas[j, 18:34])/(1 + exp(wmeans.c80 %*% bcsimbetas[j, 1:17]) + exp(wmeans.c00%*% bcsimbetas[j, 18:34]) +  exp(wmeans.c00 %*% bcsimbetas[j, 35:51]))
  pred.storage[j] <- predicted00- predicted80
} 
meanvec <- mean(pred.storage)
sdvec <- sd(pred.storage)
ate <- mean(pred.storage)
ate.SE <-  sd(pred.storage)
tstat <- (ate-0)/ate.SE
pvalvec <- 2*(1 - pnorm(abs(tstat), 0, 1))
sdvec
pvalvec
lower <- meanvec - sdvec
upper <- meanvec + sdvec

##Overall model across era effect for matched data:  -0.05920436

##Forloop for predicted values...AEE of each variable with matched data as observed data, change in mean from pre80 to post00:

#observed values for all covariates in matched sample:
wmeans.c80
wmeans.c00
head(part.samp)
calcdata <- (part.samp[ , c(1, 3:14, 17:20)])
head(calcdata)
ncol(calcdata)

pred80 <- NULL
pred00 <- NULL
ae.effect <- NULL
sd.effect <- NULL
pvalue.effect <- NULL

for(i in 1:17){
  
  calcdata <- (part.samp[ , c(1, 3:14, 17:20)])
  calcdata[ , i] <- rep(wmeans.c80[i], nrow(calcdata)) #replace each column with a covariate mean
  calcdata <- as.matrix(calcdata)
  
  for(j in 1:2000){
    pred80[j] <- exp(calcdata %*% bcsimbetas[j, 18:34])/(1 + exp(calcdata %*% bcsimbetas[j, 1:17]) + exp(calcdata%*% bcsimbetas[j, 18:34]) +  exp(calcdata %*% bcsimbetas[j, 35:51]))
  }
  
  calcdata <- (part.samp[ , c(1, 3:14, 17:20)])
  calcdata[ , i] <- rep(wmeans.c00[i], nrow(calcdata)) #replace each column with a covariate mean
  calcdata <- as.matrix(calcdata)
  for(k in 1:2000){
    pred00[k] <- exp(calcdata %*% bcsimbetas[k, 18:34])/(1 + exp(calcdata %*% bcsimbetas[k, 1:17]) + exp(calcdata%*% bcsimbetas[k, 18:34]) +  exp(calcdata %*% bcsimbetas[k, 35:51]))
  } 
  effect.vec <- pred00 - pred80  
  ae.effect[i] <- mean(effect.vec)
  sd.effect[i] <- sd(effect.vec)
  ate <- mean(effect.vec)
  ate.SE <-  sd(effect.vec)
  tstat <- (ate-0)/ate.SE
  pvalue.effect[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}

AEEMatched <- cbind(colnames(calcdata), ae.effect, sd.effect, pvalue.effect)
save(AEEMatched, file = "AEEMatchedTable.Rdata")

#########################################################################
###Across-Era effects for Model 6:

load("NewCoeffs6.Rdata")

head(coeffs6)
dim(coeffs6)

original.coeffs <- as.data.frame(model3f$coefficients)
original.coeffs <- original.coeffs[ , 1]
original.coeffs <- as.vector(original.coeffs[1:54])
expected.coeffs <- apply(na.omit(coeffs6), 2, mean)
expected.vector <- as.vector(expected.coeffs)

bias <- original.coeffs - expected.vector

#####################################################

##Getting ready to find predicted values: pulling relevant covariates; doing bias-correction across all bootstrapped coefficients

head(samp)
part.samp <- samp[ , c(1, 11, 6, 7, 8, 9, 15, 10, 17, 3, 16, 14, 13, 27, 24, 25, 26, 28, 20, 21)]
dim(part.samp)
intercept <- rep(1, nrow(part.samp)) 

part.samp <- cbind(intercept, part.samp) 
head(part.samp)

names(part.samp) <- c("intercept", "year", "efficacy", "education", "age", "female", "black", "totalmentions", "pidstrength2", 
                      "sumissue", "undecided", "ambivalent", "changerpdi", "reelectcamp", "religserv1", "fac11", "fac12", "fac20", "religserv1:sumissue", "Newcatvoter3", "weight2")

######
bc.coeffs2 <- matrix(NA, nrow(coeffsB), 54)

for(i in 1:nrow(coeffsB)){
  bc.coeffs2[i, ] <-  coeffsB[i, ] + bias
}

bc.SR <- bc.coeffs2[ , c(1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49, 52)]
bc.SF <- bc.coeffs2[ , c(2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53)]
bc.SS <- bc.coeffs2[ , c(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54)]

bcsimbetas <- as.matrix(cbind(bc.SR, bc.SF, bc.SS)) 

##Forloop for predicted values...
#observed values for all covariates in matched sample:

wmeans.c80.2 <-  apply(part.samp[part.samp$year < 1980, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year < 1980])
wmeans.c00.2 <-  apply(part.samp[part.samp$year > 2000, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year > 2000])

wmeans.c80.2 <-  wmeans.c80.2[-c(2, 20, 21)]  
wmeans.c00.2 <-  wmeans.c00.2[-c(2, 20, 21)]  

head(part.samp)
calcdata <- part.samp[ , -c(2, 20,21)]
head(calcdata)
ncol(calcdata)

pred80 <- NULL
pred00 <- NULL
ae.effect.R <- NULL
sd.effect.R <- NULL
pvalue.effect.R <- NULL

for(i in 1:18){
  calcdata <-  part.samp[ , -c(2, 20, 21)]
  calcdata[ , i] <- rep(wmeans.c80.2[i], nrow(calcdata)) #replace each column with a covariate mean
  calcdata <- as.matrix(calcdata)
  for(j in 1:nrow(bcsimbetas)){
    predict80 <- exp(calcdata %*% bcsimbetas[j, 19:36])/(1 + exp(calcdata %*% bcsimbetas[j, 1:18]) + exp(calcdata%*% bcsimbetas[j, 19:36]) +  exp(calcdata %*% bcsimbetas[j, 37:54]))
    pred80[j] <- predict80
  }
  calcdata <- part.samp[ , -c(2, 20,21)]
  calcdata[ , i] <- rep(wmeans.c00.2[i], nrow(calcdata)) #replace each column with a covariate mean
  calcdata <- as.matrix(calcdata)
  for(k in 1:nrow(bcsimbetas)){
    predict00 <- exp(calcdata %*% bcsimbetas[k, 19:36])/(1 + exp(calcdata %*% bcsimbetas[k, 1:18]) + exp(calcdata%*% bcsimbetas[k, 19:36]) +  exp(calcdata %*% bcsimbetas[k, 37:54]))
    pred00[k] <- predict00
  } 
  effect.vec <- pred00 - pred80  
  ae.effect.R[i] <- mean(na.omit(effect.vec))
  sd.effect.R[i] <- sd(na.omit(effect.vec))
  ate <- mean(na.omit(effect.vec))
  ate.SE <-  sd(na.omit(effect.vec))
  tstat <- (ate-0)/ate.SE
  pvalue.effect.R[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}
AEE.Relig <- cbind(names(model3f$coefficients), ae.effect.R, sd.effect.R, pvalue.effect.R)

###Repeat for pre- post-90:
wmeans.c89.2 <-  apply(part.samp[part.samp$year < 1989, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year < 1989])
wmeans.c90.2 <-  apply(part.samp[part.samp$year > 1990, ], 2,  weighted.mean, w = part.samp$weight2[part.samp$year > 1990])
wmeans.c89.2 <-  wmeans.c89.2[-c(2, 20, 21)]  
wmeans.c90.2 <-  wmeans.c90.2[-c(2, 20, 21)]  

head(part.samp)
calcdata <- part.samp[ , -c(2, 20,21)]
head(calcdata)
ncol(calcdata)

pred89 <- NULL
pred90 <- NULL
ae.effect.R90 <- NULL
sd.effect.R90 <- NULL
pvalue.effect.R90 <- NULL

for(i in 1:18){
  calcdata <-  part.samp[ , -c(2, 20, 21)]
  calcdata[ , i] <- rep(wmeans.c89.2[i], nrow(calcdata)) #replace each column with a covariate mean
  calcdata <- as.matrix(calcdata)
  for(j in 1:nrow(bcsimbetas)){
    predict89 <- exp(calcdata %*% bcsimbetas[j, 19:36])/(1 + exp(calcdata %*% bcsimbetas[j, 1:18]) + exp(calcdata%*% bcsimbetas[j, 19:36]) +  exp(calcdata %*% bcsimbetas[j, 37:54]))
    pred89[j] <- predict89
  }
  calcdata <- part.samp[ , -c(2, 20,21)]
  calcdata[ , i] <- rep(wmeans.c90.2[i], nrow(calcdata)) #replace each column with a covariate mean
  calcdata <- as.matrix(calcdata)
  for(k in 1:nrow(bcsimbetas)){
    predict90 <- exp(calcdata %*% bcsimbetas[k, 19:36])/(1 + exp(calcdata %*% bcsimbetas[k, 1:18]) + exp(calcdata%*% bcsimbetas[k, 19:36]) +  exp(calcdata %*% bcsimbetas[k, 37:54]))
    pred90[k] <- predict90
  } 
  effect.vec <- pred90 - pred89  
  ae.effect.R90[i] <- mean(na.omit(effect.vec))
  sd.effect.R90[i] <- sd(na.omit(effect.vec))
  ate <- mean(na.omit(effect.vec))
  ate.SE <-  sd(na.omit(effect.vec))
  tstat <- (ate-0)/ate.SE
  pvalue.effect.R90[i] <- 2*(1 - pnorm(abs(tstat), 0, 1))
}

AEE.Relig90 <- cbind(names(model3f$coefficients), ae.effect.R90, sd.effect.R90, pvalue.effect.R90)

#############################################################################################
##Predicted values for Era interaction model- binary for pre- and post-1990
head(samp)
table(era) <- samp$era
head(part.samp)
calcdata <- part.samp[ , -c(2, 15:19, 20, 21)]
calcdata <- as.matrix(calcdata)
calcdata <- cbind(calcdata, (rep(0, nrow(calcdata))))
head(calcdata)
dim(calcdata)

##pre-90 era only

bcsimbetas <- model3I$coefficients[1:42]  
pred90 <- NULL  

for(i in 1:nrow(calcdata)){
  predict90 <- exp(calcdata[i, ] %*% bcsimbetas[15:28])/(1 + exp(calcdata[i, ] %*% bcsimbetas[1:14]) + exp(calcdata[i, ] %*% bcsimbetas[15:28]) +  exp(calcdata[i, ] %*% bcsimbetas[29:42]))
  pred90[i] <- predict90
} 

mean(pred90) # 2.834436e-08
sd(pred90) # 6.904524e-07

#post90 era
calcdata <- part.samp[ , -c(2, 15:19, 20, 21)]
calcdata <- as.matrix(calcdata)
calcdata <- cbind(calcdata, (rep(1, nrow(calcdata))))
head(calcdata)
dim(calcdata)

bcsimbetas <- model3I$coefficients[1:42]  
pred91 <- NULL  

for(i in 1:nrow(calcdata)){
  predict91 <- exp(calcdata[i, ] %*% bcsimbetas[15:28])/(1 + exp(calcdata[i, ] %*% bcsimbetas[1:14]) + exp(calcdata[i, ] %*% bcsimbetas[15:28]) +  exp(calcdata[i, ] %*% bcsimbetas[29:42]))
  pred91[i] <- predict91
} 

mean(pred91)
sd(pred91)

mean(pred91 - pred90)
